9 Genetic Algorithm Overview
Objective functions can optionally take in step, budget, and generations.
step - The same objective function will be run for #evaluation_early_stop_steps, the current step will be passed into the function as an interger. (This is useful for getting a single fold of cross validation for example).
budget - A parameter that varies over the course of the generations. Gets passed into the objective function as a float between 0 and 1. If the budget of the previous evaluation is less than the current budget, it will get re-evaluated. Useful for using smaller datasets earlier in training.
generations - an int corresponding to the current generation number.
In [1]:
Copied!
#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot2.individual.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot2.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=32,
verbose = 1,
)
evolver.optimize()
#knapsack problem
import numpy as np
import tpot2
import random
import matplotlib.pyplot as plt
from dask.distributed import Client, LocalCluster
class SubsetSelector(tpot2.individual.BaseIndividual):
def __init__( self,
values,
initial_set = None,
k=1, #step size for shuffling
):
if isinstance(values, int):
self.values = set(range(0,values))
else:
self.values = set(values)
if initial_set is None:
self.subsets = set(random.choices(values, k=k))
else:
self.subsets = set(initial_set)
self.k = k
self.mutation_list = [self._mutate_add, self._mutate_remove]
self.crossover_list = [self._crossover_swap]
def mutate(self, rng=None):
mutation_list_copy = self.mutation_list.copy()
random.shuffle(mutation_list_copy)
for func in mutation_list_copy:
if func():
return True
return False
def crossover(self, ind2, rng=None):
crossover_list_copy = self.crossover_list.copy()
random.shuffle(crossover_list_copy)
for func in crossover_list_copy:
if func(ind2):
return True
return False
def _mutate_add(self,):
not_included = list(self.values.difference(self.subsets))
if len(not_included) > 1:
self.subsets.update(random.sample(not_included, k=min(self.k, len(not_included))))
return True
else:
return False
def _mutate_remove(self,):
if len(self.subsets) > 1:
self.subsets = self.subsets - set(random.sample(list(self.subsets), k=min(self.k, len(self.subsets)-1) ))
def _crossover_swap(self, ss2):
diffs = self.subsets.symmetric_difference(ss2.subsets)
if len(diffs) == 0:
return False
for v in diffs:
self.subsets.discard(v)
ss2.subsets.discard(v)
random.choice([self.subsets, ss2.subsets]).add(v)
return True
def unique_id(self):
return str(tuple(sorted(self.subsets)))
def individual_generator():
while True:
yield SubsetSelector(values=np.arange(len(values)))
values = np.random.randint(200,size=100)
weights = np.random.random(200)*10
max_weight = 50
def simple_objective(ind, **kwargs):
subset = np.array(list(ind.subsets))
if len(subset) == 0:
return 0, 0
total_weight = np.sum(weights[subset])
total_value = np.sum(values[subset])
if total_weight > max_weight:
total_value = 0
return total_value, total_weight
objective_names = ["Value", "Weight"]
objective_function_weights = [1,-1]
evolver = tpot2.evolvers.BaseEvolver( individual_generator=individual_generator(),
objective_functions=[simple_objective],
objective_function_weights = objective_function_weights,
bigger_is_better = True,
population_size= 100,
objective_names = objective_names,
generations= 100,
n_jobs=32,
verbose = 1,
)
evolver.optimize()
Generation: 100%|██████████| 100/100 [02:15<00:00, 1.35s/it]
In [2]:
Copied!
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
final_population_results = evolver.population.evaluated_individuals
final_population_results.reset_index(inplace=True)
final_population_results = final_population_results.rename(columns = {'index':'Selected Index'})
best_idx = final_population_results["Value"].idxmax()
best_individual = final_population_results.loc[best_idx]['Individual']
print("best subset", best_individual.subsets)
print("Best value {0}, weight {1}".format(final_population_results.loc[best_idx, "Value"],final_population_results.loc[best_idx, "Weight"]))
print()
print("All results")
final_population_results
best subset {3, 7, 8, 10, 13, 22, 31, 42, 43, 51, 57, 60, 64, 67, 68, 76, 80, 83, 97, 98} Best value 2925.0, weight 49.597868834152706 All results
Out[2]:
Selected Index | Value | Weight | Parents | Variation_Function | Individual | Generation | Submitted Timestamp | Completed Timestamp | Eval Error | Pareto_Front | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | (70,) | 44.0 | 0.834758 | NaN | NaN | <__main__.SubsetSelector object at 0x7b8da0987... | 0.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
1 | (42,) | 147.0 | 3.091616 | NaN | NaN | <__main__.SubsetSelector object at 0x7b8da0987... | 0.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
2 | (90,) | 95.0 | 6.653284 | NaN | NaN | <__main__.SubsetSelector object at 0x7b8da0985... | 0.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
3 | (94,) | 159.0 | 7.523552 | NaN | NaN | <__main__.SubsetSelector object at 0x7b8da0985... | 0.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
4 | (97,) | 184.0 | 2.483618 | NaN | NaN | <__main__.SubsetSelector object at 0x7b8da0987... | 0.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
9995 | (61, 71, 99) | 307.0 | 17.213843 | ((61,), (61,)) | ind_mutate | <__main__.SubsetSelector object at 0x7b8cd5553... | 99.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
9996 | (7, 8, 22, 36, 43, 51, 61, 67, 68, 75, 80, 98) | 1457.0 | 20.259947 | ((7, 8, 22, 36, 43, 51, 61, 67, 68, 80, 98), (... | ind_mutate | <__main__.SubsetSelector object at 0x7b8cd5553... | 99.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
9997 | (7, 8, 20, 22, 43, 61, 67, 98) | 997.0 | 14.617154 | ((7, 8, 22, 43, 61, 98), (7, 8, 22, 43, 61, 98)) | ind_mutate | <__main__.SubsetSelector object at 0x7b8cd5553... | 99.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
9998 | (7, 8, 22, 25, 36, 43, 51, 67, 78, 98) | 1167.0 | 10.815193 | ((7, 8, 22, 36, 43, 51, 67, 78, 98), (7, 8, 22... | ind_mutate | <__main__.SubsetSelector object at 0x7b8cd5553... | 99.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
9999 | (0, 7, 8, 22, 36, 43, 51, 67, 68, 98) | 1318.0 | 15.988821 | ((7, 8, 22, 36, 43, 51, 67, 68, 98), (7, 8, 22... | ind_mutate | <__main__.SubsetSelector object at 0x7b8cd5553... | 99.0 | 1.727561e+09 | 1.727561e+09 | None | NaN |
10000 rows × 11 columns
In [3]:
Copied!
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()
from scipy.stats import binned_statistic_2d
y = final_population_results["Value"]
x = final_population_results["Weight"]
c = final_population_results["Generation"]
x_bins = np.linspace(0, 100, 100)
y_bins = np.linspace(0, 3000, 100)
ret = binned_statistic_2d(x, y, c, statistic=np.mean, bins=[x_bins, y_bins])
fig, ax1 = plt.subplots(1, 1, figsize=(12, 4))
im = ax1.imshow(ret.statistic.T, origin='lower', extent=(0,100,0,3000), vmin=0, vmax=100, aspect=.03)
ax1.set_xlabel("Weight")
ax1.set_ylabel("Value")
ax1.set_title("Binned Average Generation")
cbar = fig.colorbar(im,)
cbar.set_label('Generation')
plt.tight_layout()