4 Symbolic Regression and Classification
The following configurations allow TPOT2 to learn a symbolic classification or regression model.
Leafs: Leaves can either select individual columns or output 1's or 0's.
Inner nodes: arithmetic operators
Root: logistic regression
Symbolic Classification
In [2]:
Copied!
import tpot2
import sklearn.datasets
from sklearn.linear_model import LogisticRegression
import numpy as np
from tpot2.builtin_modules import ZeroTransformer, OneTransformer
from tpot2.config.classifiers import params_LogisticRegression
root_config_dict = {LogisticRegression: params_LogisticRegression}
leaf_config_dict = ["feature_set_selector", {ZeroTransformer: {}, OneTransformer: {}}]
est = tpot2.TPOTEstimator(population_size=100,generations=50,
scorers=['roc_auc'],
scorers_weights=[1],
other_objective_functions=[tpot2.objectives.number_of_nodes_objective],
other_objective_functions_weights=[-1],
classification=True,
inner_config_dict= "arithmetic_transformer",
leaf_config_dict=leaf_config_dict,
root_config_dict=root_config_dict,
n_jobs=32,
verbose=1,
)
#load iris
scorer = sklearn.metrics.get_scorer('roc_auc_ovo')
X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=100, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
est.fit(X_train, y_train)
print(scorer(est, X_test, y_test))
est.fitted_pipeline_.plot()
import tpot2
import sklearn.datasets
from sklearn.linear_model import LogisticRegression
import numpy as np
from tpot2.builtin_modules import ZeroTransformer, OneTransformer
from tpot2.config.classifiers import params_LogisticRegression
root_config_dict = {LogisticRegression: params_LogisticRegression}
leaf_config_dict = ["feature_set_selector", {ZeroTransformer: {}, OneTransformer: {}}]
est = tpot2.TPOTEstimator(population_size=100,generations=50,
scorers=['roc_auc'],
scorers_weights=[1],
other_objective_functions=[tpot2.objectives.number_of_nodes_objective],
other_objective_functions_weights=[-1],
classification=True,
inner_config_dict= "arithmetic_transformer",
leaf_config_dict=leaf_config_dict,
root_config_dict=root_config_dict,
n_jobs=32,
verbose=1,
)
#load iris
scorer = sklearn.metrics.get_scorer('roc_auc_ovo')
X, y = sklearn.datasets.make_classification(n_samples=1000, n_features=100, n_informative=6, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2, weights=None, flip_y=0.01, class_sep=1.0, hypercube=True, shift=0.0, scale=1.0, shuffle=True, random_state=None)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
est.fit(X_train, y_train)
print(scorer(est, X_test, y_test))
est.fitted_pipeline_.plot()
Generation: 100%|██████████| 50/50 [01:59<00:00, 2.39s/it]
0.8397174152569836
In [3]:
Copied!
# print all hyperparameters
for n in est.fitted_pipeline_.graph.nodes:
print(n, " : ", est.fitted_pipeline_.graph.nodes[n]['instance'])
# print all hyperparameters
for n in est.fitted_pipeline_.graph.nodes:
print(n, " : ", est.fitted_pipeline_.graph.nodes[n]['instance'])
LogisticRegression_1 : LogisticRegression(C=282.83015030119856, max_iter=1000, n_jobs=1, solver='sag') FeatureSetSelector_1 : FeatureSetSelector(name='50', sel_subset=[50]) FeatureSetSelector_2 : FeatureSetSelector(name='16', sel_subset=[16]) MaxTransformer_1 : MaxTransformer() LTTransformer_1 : LTTransformer() FeatureSetSelector_3 : FeatureSetSelector(name='42', sel_subset=[42]) FeatureSetSelector_4 : FeatureSetSelector(name='21', sel_subset=[21]) MaxTransformer_2 : MaxTransformer() LTTransformer_2 : LTTransformer() MulTransformer_1 : MulTransformer()
In [4]:
Copied!
pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]
#plot the pareto front of number_of_leaves_objective vs roc_auc_score
import matplotlib.pyplot as plt
plt.scatter(pareto_front['number_of_nodes_objective'], pareto_front['roc_auc_score'])
plt.xlabel('Number of Nodes')
plt.ylabel('roc_auc_score')
plt.show()
pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]
#plot the pareto front of number_of_leaves_objective vs roc_auc_score
import matplotlib.pyplot as plt
plt.scatter(pareto_front['number_of_nodes_objective'], pareto_front['roc_auc_score'])
plt.xlabel('Number of Nodes')
plt.ylabel('roc_auc_score')
plt.show()
Symbolic Regression
In [5]:
Copied!
import tpot2
import sklearn.datasets
from sklearn.linear_model import SGDRegressor
import numpy as np
from tpot2.builtin_modules import ZeroTransformer, OneTransformer
from tpot2.config.regressors import params_SGDRegressor
root_config_dict = {SGDRegressor: params_SGDRegressor}
leaf_config_dict = ["feature_set_selector", {ZeroTransformer: {}, OneTransformer: {}}]
est = tpot2.TPOTEstimator(population_size=100,generations=50,
scorers=['neg_mean_squared_error'],
scorers_weights=[1],
other_objective_functions=[tpot2.objectives.number_of_nodes_objective],
other_objective_functions_weights=[-1],
n_jobs=32,
classification=False,
inner_config_dict= "arithmetic_transformer",
leaf_config_dict=leaf_config_dict,
root_config_dict=root_config_dict,
verbose=1,
processes=False,
)
scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')
X, y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=6)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
est.fit(X_train, y_train)
print(scorer(est, X_test, y_test))
est.fitted_pipeline_.plot()
import tpot2
import sklearn.datasets
from sklearn.linear_model import SGDRegressor
import numpy as np
from tpot2.builtin_modules import ZeroTransformer, OneTransformer
from tpot2.config.regressors import params_SGDRegressor
root_config_dict = {SGDRegressor: params_SGDRegressor}
leaf_config_dict = ["feature_set_selector", {ZeroTransformer: {}, OneTransformer: {}}]
est = tpot2.TPOTEstimator(population_size=100,generations=50,
scorers=['neg_mean_squared_error'],
scorers_weights=[1],
other_objective_functions=[tpot2.objectives.number_of_nodes_objective],
other_objective_functions_weights=[-1],
n_jobs=32,
classification=False,
inner_config_dict= "arithmetic_transformer",
leaf_config_dict=leaf_config_dict,
root_config_dict=root_config_dict,
verbose=1,
processes=False,
)
scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')
X, y = sklearn.datasets.make_regression(n_samples=1000, n_features=100, n_informative=6)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
est.fit(X_train, y_train)
print(scorer(est, X_test, y_test))
est.fitted_pipeline_.plot()
Generation: 100%|██████████| 50/50 [02:24<00:00, 2.89s/it]
-53.572578179092396
In [6]:
Copied!
# print all hyperparameters
for n in est.fitted_pipeline_.graph.nodes:
print(n, " : ", est.fitted_pipeline_.graph.nodes[n]['instance'])
# print all hyperparameters
for n in est.fitted_pipeline_.graph.nodes:
print(n, " : ", est.fitted_pipeline_.graph.nodes[n]['instance'])
SGDRegressor_1 : SGDRegressor(alpha=1.6814005088136593e-05, eta0=0.6868335822696461, fit_intercept=False, l1_ratio=0.5144783118066449, learning_rate='constant', loss='huber', penalty='elasticnet', power_t=5.487407069184651) FeatureSetSelector_1 : FeatureSetSelector(name='34', sel_subset=[34]) FeatureSetSelector_2 : FeatureSetSelector(name='17', sel_subset=[17]) FeatureSetSelector_3 : FeatureSetSelector(name='16', sel_subset=[16]) FeatureSetSelector_4 : FeatureSetSelector(name='3', sel_subset=[3]) FeatureSetSelector_5 : FeatureSetSelector(name='19', sel_subset=[19]) ZeroTransformer_1 : ZeroTransformer()
In [7]:
Copied!
pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]
#plot the pareto front of number_of_leaves_objective vs roc_auc_score
import matplotlib.pyplot as plt
plt.scatter(pareto_front['number_of_nodes_objective'], pareto_front['mean_squared_error'])
plt.xlabel('Number of Nodes')
plt.ylabel('neg_mean_squared_error')
plt.show()
pareto_front = est.evaluated_individuals[est.evaluated_individuals['Pareto_Front'] == 1]
#plot the pareto front of number_of_leaves_objective vs roc_auc_score
import matplotlib.pyplot as plt
plt.scatter(pareto_front['number_of_nodes_objective'], pareto_front['mean_squared_error'])
plt.xlabel('Number of Nodes')
plt.ylabel('neg_mean_squared_error')
plt.show()