import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio
= "notebook"
pio.renderers.default
def experimental_data(temp, conc):
"""
This function simulates experimental data based on temperature and concentration.
The function is a Gaussian-like function that peaks at certain temperature and concentration values.
The function is not based on any real experimental data and is purely for demonstration purposes.
"""
= np.exp(-((temp - 50) ** 2)/1000)*np.exp(-((conc - .50) ** 2)/.05)-.9*np.exp(-((temp - 45) ** 2)/100)*np.exp(-((conc - .450) ** 2)/.05)
out return out
def generate_data(N=100):
= np.linspace(0, 100, N)
temp = np.linspace(0, 1, N)
conc = np.meshgrid(temp, conc)
data = data[0].flatten(), data[1].flatten()
temp, conc = experimental_data(temp, conc)
exp_data = pd.DataFrame({'Temperature': temp, 'Concentration': conc, 'Yield': exp_data})
df return df
= generate_data(500)
df
# Prepare data for 3D surface plot
= np.unique(df['Temperature'])
unique_temps = np.unique(df['Concentration'])
unique_concs = df.pivot(index='Concentration', columns='Temperature', values='Yield').values
Z # find the maximum yield and its position
= np.max(Z)
max_yield = np.unravel_index(np.argmax(Z), Z.shape)
max_pos = unique_temps[max_pos[1]]
max_temp = unique_concs[max_pos[0]]
max_conc
# Create the contour plot
= go.Figure(data=[go.Contour(
fig =Z,
z=unique_temps,
x=unique_concs,
y='Viridis',
colorscale=dict(coloring='heatmap', size=0.1),
contours='Temperature: %{x}<br>Concentration: %{y}<br>Yield: %{z}<extra></extra>'
hovertemplate
)])
# Update layout with legend at the top, below the title
fig.update_layout(='Temperature',
xaxis_title='Concentration',
yaxis_title=dict(
legend=0.5, # Center the legend horizontally
x=1.05, # Place the legend just below the title
y='h', # Horizontal orientation
orientation='center',
xanchor='bottom'
yanchor
)
)
# Add a marker for the maximum yield
fig.add_trace(go.Scatter(=[max_temp],
x=[max_conc],
y='markers',
mode=dict(size=10, color='red', symbol='x'),
marker='Max Yield that we want to determine through experiments',
name=[[max_yield]], # Add max_yield as custom data
customdata='Max Yield:<br>Temperature: %{x}<br>Concentration: %{y}<br>Yield: %{customdata[0]}<extra></extra>'
hovertemplate
))
# Create some sample data
= generate_data(2)
df_sample
# Add the experimental data on the plot
fig.add_trace(go.Scatter(=df_sample['Temperature'],
x=df_sample['Concentration'],
y='markers',
mode=dict(size=10, color='white', symbol='circle',
marker=dict(width=2, color='black')),
line='Experimental measurements we have so far',
name
))
fig.show()
# Save the experimental data to a CSV file
'experimental_data.csv', index=False) df_sample.to_csv(
Bayesian Optimization using OPTIMEO
Single outcome
First, we generate some data
Let’s create an experimental_data(temp, conc)
function that simulates the yield of a chemical reaction based on temperature and concentration. The yield is a function of these two variables, and we will use Bayesian Optimization to find the position of the maximum in the minimum number of experiments.
In the following block, we just plot this function to see what it looks like(but in real life, you have no idea what the function looks like). We also create an experimental data set where we have already done some experiments (they are the red crosses in the plot).
Bayesian Optimization
Now, we will use the OPTIMEO package to perform Bayesian Optimization. First, we load the data in the right format.
from optimeo.bo import *
# there is only one outcome here, in the last column (-1)
= read_experimental_data('experimental_data.csv', out_pos=[-1])
features, outcomes print(f"Features:\n{features}")
print(f"Outcomes:\n{outcomes}")
Features:
{'Temperature': {'type': 'float', 'data': [0.0, 100.0, 0.0, 100.0], 'range': [np.float64(0.0), np.float64(100.0)]}, 'Concentration': {'type': 'float', 'data': [0.0, 0.0, 1.0, 1.0], 'range': [np.float64(0.0), np.float64(1.0)]}}
Outcomes:
{'Yield': {'type': 'float', 'data': [0.0005530843449776, 0.0005530843701466, 0.0005530843667414, 0.0005530843701476]}}
The ranges and types of the features are automatically determined from the data (see the data printed above). In case you want to change the ranges or types, you can do so editing the corresponding fields in the features
dict or by providing a ranges
dict. The ranges should be in the format {'feature_name': [minvalue,maxvalue]}
. If the ranges are not provided either in features
or in ranges
, they will be determined from the data.
= {'Temperature': [-10,100]} ranges
Now, let’s create the BOExperiment object. This object contains the data, the features, and the model. The model is a Gaussian Process with a Matern kernel.
= BOExperiment(
bo =features,
features=outcomes,
outcomes# ranges=ranges,
= 1, # number of new points to generate
N =True, # we want to maximize the response
maximize=None,
outcome_constraints=None, # fixed features are not used here
fixed_features# but they can be added as fixed_features = {Temperature: 50, Concentration: 0.5}
=None, # feature constraints are not used here
feature_constraints# but they can be added as
# feature_constraints = ['Concentration + Temperature <= 200']
= 'sobol', # sobol is used to randomly generate the new points
optim # to actually optimize, use optim = 'bo'
)
bo
BOExperiment(
N=1,
maximize={'Yield': True},
outcome_constraints=None,
feature_constraints=None,
optim=sobol
)
Input data:
Temperature Concentration Yield
0 0.0 0.0 0.000553
1 100.0 0.0 0.000553
2 0.0 1.0 0.000553
3 100.0 1.0 0.000553
= bo.suggest_next_trials()
new_points print(f"New points to sample:\n{new_points}")
= bo.plot_model()
fig fig.show()
New points to sample:
Temperature Concentration Predicted_Yield
0 35.034025 0.139437 0.000553
Now let’s do the optimization. We will first perform 6 random point generations, then we will use the Bayesian Optimization algorithm to find the maximum of the function. The algorithm will use the Gaussian Process model to predict the function value at each point and will use the expected improvement criterion to select the next point to evaluate.
for i in range(30): #let's do 30 iterations
if i==6:
= 'bo' # change to BO optimization after 6 iterations
bo.optim # simulate the new points
= bo.suggest_next_trials(with_predicted=False)
new = new['Temperature'].values
newT = new['Concentration'].values
newC # perform an experiment to measure the response at these points
# here we just simulate the response using the experimental data function
# in a real experiment, you would measure the response at these points
# and add the new points to the experimental data
= experimental_data(newT, newC)
measured_yield # add the new points to the experimental data
= {'Temperature':newT, 'Concentration':newC},
bo.update_experiment(params = {'Yield': measured_yield}) outcomes
Now le’ts plot the model:
bo.plot_model()
print(f"Best parameters from BO:")
print(bo.get_best_parameters())
print(f"Expected best parameters:")
print(pd.DataFrame({'Temperature': max_temp, 'Concentration': max_conc, 'Yield':max_yield}, index=[0]))
Best parameters from BO:
Temperature Concentration Yield
0 61.220096 0.508693 0.819884
Expected best parameters:
Temperature Concentration Yield
0 61.322645 0.503006 0.820258
And the convergence plot: we see here that the maximum was found after the 13th iteration. You can play on the number of random points to see how it affects the convergence.
=max_yield) bo.plot_optimization_trace(optimum
Two outcomes
First, we generate some data
Like before, we will generate some data. We will use the same function for the yield, and andd another function for the price of the experiment: we will want to maximize rthe yield and minimize the price. The price is a function of the temperature and concentration, but it is not a function of the yield.
from plotly.subplots import make_subplots
def price(temp, conc):
"""
This function simulates the price of the experiment.
"""
= (np.exp(-((temp - 45) ** 2)/2000)*np.exp(-((conc - .350) ** 2)/.08)-1.2*np.exp(-((temp - 55) ** 2)/150)*np.exp(-((conc - .250) ** 2)/.05))*100+150
out return out
def generate_data(N=100):
= np.linspace(0, 100, N)
temp = np.linspace(0, 1, N)
conc = np.meshgrid(temp, conc)
data = data[0].flatten(), data[1].flatten()
temp, conc = experimental_data(temp, conc)
exp_data = price(temp, conc)
price_data = pd.DataFrame({'Temperature': temp, 'Concentration': conc, 'Yield': exp_data, 'Price': price_data})
df return df
= generate_data(500)
df
# Prepare data for 3D surface plot
= np.unique(df['Temperature'])
unique_temps = np.unique(df['Concentration'])
unique_concs = df.pivot(index='Concentration', columns='Temperature', values='Yield').values
Z = df.pivot(index='Concentration', columns='Temperature', values='Price').values
ZZ # find the maximum yield and its position
= np.max(Z)
max_yield = np.unravel_index(np.argmax(Z), Z.shape)
max_pos = unique_temps[max_pos[1]]
max_temp = unique_concs[max_pos[0]]
max_conc # min price
= np.min(ZZ)
min_price = np.unravel_index(np.argmin(ZZ), ZZ.shape)
min_pos = unique_temps[min_pos[1]]
minp_temp = unique_concs[min_pos[0]]
minp_conc
# Create the subplots
= make_subplots(rows=1, cols=2, subplot_titles=('Yield', 'Price'))
fig
# Add the contour plot for Yield (Z)
fig.add_trace(
go.Contour(=Z,
z=unique_temps,
x=unique_concs,
y='Viridis',
colorscale=dict(coloring='heatmap', size=0.1),
contours='Temperature: %{x}<br>Concentration: %{y}<br>Yield: %{z}<extra></extra>'
hovertemplate
),=1, col=1
row
)
# Add the contour plot for Price (ZZ)
fig.add_trace(
go.Contour(=ZZ,
z=unique_temps,
x=unique_concs,
y='Viridis',
colorscale=dict(coloring='heatmap', size=0.1),
contours='Temperature: %{x}<br>Concentration: %{y}<br>Price: %{z}<extra></extra>'
hovertemplate
),=1, col=2
row
)
# Update layout with legend at the top, below the title
fig.update_layout(='Temperature',
xaxis_title='Concentration',
yaxis_title='Temperature',
xaxis2_title='Concentration',
yaxis2_title=dict(
legend=0.5, # Center the legend horizontally
x=1.05, # Place the legend just below the title
y='h', # Horizontal orientation
orientation='center',
xanchor='bottom'
yanchor
)
)
# Add a marker for the maximum yield
fig.add_trace(
go.Scatter(=[max_temp],
x=[max_conc],
y='markers',
mode=dict(size=10, color='red', symbol='x'),
marker='Max Yield',
name=[[max_yield]], # Add max_yield as custom data
customdata='Max Yield:<br>Temperature: %{x}<br>Concentration: %{y}<br>Yield: %{customdata[0]}<extra></extra>'
hovertemplate
),=1, col=1
row
)
# Add a marker for the minimum price
fig.add_trace(
go.Scatter(=[minp_temp],
x=[minp_conc],
y='markers',
mode=dict(size=10, color='orange', symbol='x'),
marker='Min Price',
name=[[min_price]], # Add min_price as custom data
customdata='Min Price:<br>Temperature: %{x}<br>Concentration: %{y}<br>Price: %{customdata[0]}<extra></extra>'
hovertemplate
),=1, col=2
row
)
# Create some sample data
= generate_data(2)
df_sample
# Add the experimental data on the plot for Yield
fig.add_trace(
go.Scatter(=df_sample['Temperature'],
x=df_sample['Concentration'],
y='markers',
mode=dict(size=10, color='white', symbol='circle',
marker=dict(width=2, color='black')),
line='Experimental Measurements (Yield)',
name
),=1, col=1
row
)
# Add the experimental data on the plot for Price
fig.add_trace(
go.Scatter(=df_sample['Temperature'],
x=df_sample['Concentration'],
y='markers',
mode=dict(size=10, color='white', symbol='circle',
marker=dict(width=2, color='black')),
line='Experimental Measurements (Price)',
name
),=1, col=2
row
)
fig.show()
'experimental_data2.csv', index=False) df_sample.to_csv(
Let’s now read the experimental data and create our BOExperiment object. The data is in the same format as before, but we have two outcomes: yield and price. The features are the same as before, but we have to specify that we have two outcomes. We also have to specify the type of optimization: we want to maximize the yield and minimize the price.
= read_experimental_data('experimental_data2.csv', out_pos=[-2,-1])
features, outcomes print(f"- Features:\n{features}")
print(f"- Outcomes:\n{outcomes}")
- Features:
{'Temperature': {'type': 'float', 'data': [0.0, 100.0, 0.0, 100.0], 'range': [np.float64(0.0), np.float64(100.0)]}, 'Concentration': {'type': 'float', 'data': [0.0, 0.0, 1.0, 1.0], 'range': [np.float64(0.0), np.float64(1.0)]}}
- Outcomes:
{'Yield': {'type': 'float', 'data': [0.0005530843449776, 0.0005530843701466, 0.0005530843667414, 0.0005530843701476]}, 'Price': {'type': 'float', 'data': [157.85712040284733, 154.76553732340065, 150.18478176220222, 150.11207580199311]}}
= BOExperiment(
bo =features,
features=outcomes,
outcomes= 1, # number of new points to generate
N ={'Yield':True, 'Price':False}, # we want to maximize the response
maximize=None,
outcome_constraints=None, # fixed features are not used here
fixed_features# but they can be added as fixed_features = {Temperature: 50, Concentration: 0.5}
=None, # feature constraints are not used here
feature_constraints# but they can be added as
# feature_constraints = ['Concentration + Temperature <= 200']
= 'sobol', # sobol is used to randomly generate the new points
optim )
Let’s do the optimization:
for i in range(50): #let's do 50 iterations
if i==6:
= 'bo' # change to BO optimization after 6 iterations
bo.optim # simulate the new points
= bo.suggest_next_trials(with_predicted=False)
new = new['Temperature'].values
newT = new['Concentration'].values
newC # perform an experiment to measure the response at these points
# here we just simulate the response using the experimental data function
# in a real experiment, you would measure the response at these points
# and add the new points to the experimental data
= experimental_data(newT, newC)
measured_yield = price(newT, newC)
measured_price # add the new points to the experimental data
= {'Temperature':newT, 'Concentration':newC},
bo.update_experiment(params = {'Yield': measured_yield, 'Price': measured_price}) outcomes
Now let’s plot the model:
= [bo.plot_model(metricname=mname) for mname in bo.out_names]
figs for fig in figs:
fig.show()
The get_best_parameters()
function returns the best parameters found so far. In the case of multiple outcomes, it will be an ensemble of points.
bo.get_best_parameters()
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:166: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:167: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:166: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:167: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
[INFO 04-17 10:14:59] ax.service.utils.best_point: Using inferred objective thresholds: [ObjectiveThreshold(Price <= 197.16387577527303), ObjectiveThreshold(Yield >= 0.010359901918938652)], as objective thresholds were not specified as part of the optimization configuration on the experiment.
Temperature | Concentration | Price | Yield | |
---|---|---|---|---|
0 | 57.897013 | 0.331592 | 142.280413 | 0.404607 |
1 | 58.694735 | 0.339458 | 147.550978 | 0.445939 |
2 | 58.301005 | 0.356022 | 152.340603 | 0.488407 |
3 | 58.303991 | 0.314050 | 137.284781 | 0.361327 |
4 | 58.747299 | 0.366543 | 157.370179 | 0.530613 |
5 | 57.562162 | 0.305640 | 132.196824 | 0.321728 |
6 | 58.705903 | 0.381138 | 162.289523 | 0.573660 |
7 | 58.533775 | 0.283291 | 128.347919 | 0.280069 |
8 | 58.930060 | 0.394457 | 167.213936 | 0.617058 |
9 | 57.289987 | 0.285081 | 124.921503 | 0.261351 |
10 | 57.088387 | 0.274983 | 121.542106 | 0.232607 |
11 | 59.193792 | 0.408026 | 171.929492 | 0.659473 |
12 | 56.969153 | 0.263560 | 118.304074 | 0.204246 |
13 | 57.678023 | 0.434807 | 176.539257 | 0.687853 |
14 | 57.285682 | 0.247100 | 115.410368 | 0.175324 |
15 | 59.669112 | 0.437953 | 180.357960 | 0.737872 |
16 | 56.546327 | 0.237377 | 112.127660 | 0.145017 |
17 | 56.255879 | 0.220583 | 109.423382 | 0.113278 |
18 | 59.920463 | 0.466747 | 185.605804 | 0.787920 |
19 | 55.874548 | 0.202481 | 107.654696 | 0.083816 |
20 | 61.063312 | 0.488886 | 189.026678 | 0.818379 |
You can also plot the Pareto frontier, and then decide what is the best compromise you are prepared to make between the two outcomes. The Pareto frontier is the set of points that are not dominated by any other point. A point is dominated if there is another point that is better in both outcomes.
Here, you basically see that there are two choices for you, either high yield and high price, or low yield and low price.
bo.plot_pareto_frontier()
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:166: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:167: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:166: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword
/opt/homebrew/lib/python3.13/site-packages/botorch/optim/utils/numpy_utils.py:167: DeprecationWarning:
__array__ implementation doesn't accept a copy keyword, so passing copy=False failed. __array__ must implement 'dtype' and 'copy' keyword arguments. To learn more, see the migration guide https://numpy.org/devdocs/numpy_2_0_migration_guide.html#adapting-to-changes-in-the-copy-keyword