Source code for optimeo.bo

# Copyright (c) 2025 Colin BOUSIGE
# Contact: colin.bousige@cnrs.fr
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the MIT License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version. 

"""
This module provides a class for optimizing experiments using Bayesian Optimization (BO) with the [Ax platform](https://ax.dev/).
It includes methods for initializing the experiment, suggesting trials, predicting outcomes, and plotting results.

You can see an example notebook [here](../examples/bo.ipynb).

"""

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=RuntimeError)

import numpy as np
import pandas as pd
import random
from janitor import clean_names
from typing import Any, Dict, List, Optional, Union, Tuple

from ax.adapter.registry import Generators
from ax.core.observation import ObservationFeatures
from ax.core.trial_status import TrialStatus
from ax.exceptions.core import DataRequiredError
from ax.generation_strategy.generation_node import GenerationStep
from ax.generation_strategy.generation_strategy import GenerationStrategy
from ax.analysis.plotly.sensitivity import SensitivityAnalysisPlot
from ax.plot.contour import interact_contour, plot_contour
from ax.plot.feature_importances import plot_feature_importance_by_feature_plotly
from ax.plot.pareto_frontier import plot_pareto_frontier
from ax.plot.pareto_utils import compute_posterior_pareto_frontier
from ax.plot.slice import plot_slice
from ax.service.ax_client import AxClient, ObjectiveProperties
from botorch.acquisition.analytic import *
import plotly.graph_objects as go
import plotly.express as px
import re
import matplotlib as mpl

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

[docs] class BOExperiment: """ BOExperiment is a class designed to facilitate Bayesian Optimization experiments using the [Ax platform](https://ax.dev/). It encapsulates the experiment setup, including features, outcomes, constraints, and optimization methods. Example ------- .. code-block:: python from optimeo.bo import BOExperiment, read_experimental_data features, outcomes = read_experimental_data('data.csv', out_pos=[-2, -1]) experiment = BOExperiment( features, outcomes, N=5, maximize={'out1': True, 'out2': False}, ) experiment.suggest_next_trials() experiment.plot_model(metricname='outcome1') experiment.plot_optimization_trace() Parameters ---------- features: Dict[str, Dict[str, Any]] A dictionary defining the features of the experiment, including their types and ranges. Each feature is represented as a dictionary with keys 'type', 'data', and 'range'. - 'type': The type of the feature (e.g., 'int', 'float', 'text'). - 'data': The observed data for the feature. - 'range': The range of values for the feature. outcomes: Dict[str, Dict[str, Any]] A dictionary defining the outcomes of the experiment, including their types and observed data. Each outcome is represented as a dictionary with keys 'type' and 'data'. - 'type': The type of the outcome (e.g., 'int', 'float'). - 'data': The observed data for the outcome. ranges: Optional[Dict[str, Dict[str, Any]]] A dictionary defining the ranges of the features. Default is `None`. If not provided, the ranges will be inferred from the features data. The ranges should be in the format `{'feature_name': [minvalue,maxvalue]}`. N: int The number of trials to suggest in each optimization step. Must be a positive integer. maximize: Union[bool, Dict[str, bool]] A boolean or dict indicating whether to maximize the outcomes in the form `{'outcome1':True, 'outcome2':False}`. If a single boolean is provided, it is applied to all outcomes. Default is `True`. fixed_features: Optional[Dict[str, Any]] A dictionary defining fixed features with their values. Default is `None`. If provided, the fixed features will be treated as fixed parameters in the generation process. The fixed features should be in the format `{'feature_name': value}`. The values should be the fixed values for the respective features. outcome_constraints: Optional[List[str]] Constraints on the outcomes, specified as a list of strings. Default is `None`. The constraints should be in the format `{'outcome_name': [minvalue,maxvalue]}`. objective_thresholds: Optional[Dict[str, float]] Reference-point thresholds for multi-objective optimization, specified per outcome in the format ``{'outcome_name': threshold}``. Default is `None`. Setting these avoids Ax's default winsorization warning for multi-objective runs. feature_constraints: Optional[List[str]] Constraints on the features, specified as a list of strings. Default is `None`. The constraints should be in the format `{'feature_name': [minvalue,maxvalue]}`. optim: str The optimization method to use, either 'bo' for Bayesian Optimization or 'sobol' for Sobol sequence. Default is 'bo'. acq_func: Optional[Dict[str, Any]] The acquisition function to use for the optimization process. It must be a dict with 2 keys: - `acqf`: the acquisition function class to use (e.g., `UpperConfidenceBound`), - `acqf_kwargs`: a dict of the kwargs to pass to the acquisition function class. (e.g. `{'beta': 0.1}`). If not provided, the default acquisition function is used (`LogExpectedImprovement` or `qLogExpectedImprovement` if N>1). Attributes ---------- features: Dict[str, Dict[str, Any]] A dictionary defining the features of the experiment, including their types and ranges. outcomes: Dict[str, Dict[str, Any]] A dictionary defining the outcomes of the experiment, including their types and observed data. N: int The number of trials to suggest in each optimization step. Must be a positive integer. maximize: Union[bool, List[bool]] A boolean or list of booleans indicating whether to maximize the outcomes. If a single boolean is provided, it is applied to all outcomes. outcome_constraints: Optional[Dict[str, Dict[str, float]]] Constraints on the outcomes, specified as a dictionary or list of dictionaries. feature_constraints: Optional[List[Dict[str, Any]]] Constraints on the features, specified as a list of dictionaries. optim: str The optimization method to use, either 'bo' for Bayesian Optimization or 'sobol' for Sobol sequence. data: pd.DataFrame A DataFrame representing the current data in the experiment, including features and outcomes. acq_func: dict The acquisition function to use for the optimization process. generator_run: The generator run for the experiment, used to generate new candidates. model: The model used for predictions in the experiment. ax_client: The AxClient for the experiment, used to manage trials and data. gs: The generation strategy for the experiment, used to generate new candidates. parameters: The parameters for the experiment, including their types and ranges. names: The names of the features in the experiment. fixed_features: The fixed features for the experiment, used to generate new candidates. candidate: The candidate(s) suggested by the optimization process. Methods ------- initialize_ax_client() Initialize AxClient with experiment parameters, objectives, and constraints. suggest_next_trials() Suggest next trial(s) from the current model and generation strategy. predict(params) Predict outcomes for a list of parameter dictionaries. update_experiment(params, outcomes) Update the experiment with new observations and refresh internal state. plot_model(metricname=None, slice_values=None, linear=False) Plot model predictions as slices or contours. plot_optimization_trace(optimum=None) Plot optimization progress over trials. plot_pareto_frontier() Plot Pareto frontier for multi-objective problems. get_best_parameters() Return best parameter set(s) and associated outcomes. clear_trials() Remove all current trials. """ def __init__(self, features: Dict[str, Dict[str, Any]], outcomes: Dict[str, Dict[str, Any]], ranges: Optional[Dict[str, Dict[str, Any]]] = None, N=1, maximize: Union[bool, Dict[str, bool]] = True, fixed_features: Optional[Dict[str, Any]] = None, outcome_constraints: Optional[List[str]] = None, objective_thresholds: Optional[Dict[str, float]] = None, feature_constraints: Optional[List[str]] = None, optim='bo', acq_func=None, seed=42) -> None: self._first_initialization_done = False self.ranges = ranges self.features = features self.names = list(self._features.keys()) self.fixed_features = fixed_features self.outcomes = outcomes self.N = N self.maximize = maximize self.outcome_constraints = outcome_constraints self.objective_thresholds = objective_thresholds self.feature_constraints = feature_constraints self.optim = optim self.acq_func = acq_func self.seed = seed self.candidate = None """The candidate(s) suggested by the optimization process.""" self.ax_client = None """Ax's client for the experiment.""" self.model = None """Ax's Gaussian Process model.""" self.parameters = None """Ax's parameters for the experiment.""" self.generator_run = None """Ax's generator run for the experiment.""" self.gs = None """Ax's generation strategy for the experiment.""" self.initialize_ax_client() self.Nmetrics = len(self.ax_client.objective_names) """The number of metrics in the experiment.""" self._first_initialization_done = True """To indicate that the first initialization is done so that we don't call `initialize_ax_client()` again.""" self.pareto_frontier = None """The Pareto frontier for multi-objective optimization experiments.""" @property def seed(self) -> int: """Random seed for reproducibility. Default is 42.""" return self._seed @seed.setter def seed(self, value: int): """Set the random seed.""" if isinstance(value, int): self._seed = value else: raise Warning("Seed must be an integer. Using default seed 42.") self._seed = 42 random.seed(self.seed) np.random.seed(self.seed) @property def features(self): """ A dictionary defining the features of the experiment, including their types and ranges. Example ------- .. code-block:: python features = { 'feature1': {'type': 'int', 'data': [1, 2, 3], 'range': [1, 3]}, 'feature2': {'type': 'float', 'data': [0.1, 0.2, 0.3], 'range': [0.1, 0.3]}, 'feature3': {'type': 'text', 'data': ['A', 'B', 'C'], 'range': ['A', 'B', 'C']}, } """ return self._features @features.setter def features(self, value): """ Set the features of the experiment with validation. """ if not isinstance(value, dict): raise ValueError("features must be a dictionary") self._features = value for name in self._features.keys(): if self.ranges and name in self.ranges.keys(): self._features[name]['range'] = self.ranges[name] else: feature_data = self._features[name].get('data', []) if len(feature_data) == 0: self._features[name]['range'] = self._features[name].get('range', []) elif self._features[name]['type'] == 'text': self._features[name]['range'] = list(set(feature_data)) elif self._features[name]['type'] == 'int': self._features[name]['range'] = [int(np.min(self._features[name]['data'])), int(np.max(self._features[name]['data']))] elif self._features[name]['type'] == 'float': self._features[name]['range'] = [float(np.min(self._features[name]['data'])), float(np.max(self._features[name]['data']))] if self._first_initialization_done: self.initialize_ax_client() @property def ranges(self): """ A dictionary defining the ranges of the features. Default is `None`. If not provided, the ranges will be inferred from the features data. The ranges should be in the format `{'feature_name': [minvalue,maxvalue]}`. """ return self._ranges @ranges.setter def ranges(self, value): """ Set the ranges of the features with validation. """ if value is not None: if not isinstance(value, dict): raise ValueError("ranges must be a dictionary") self._ranges = value @property def names(self): """ The names of the features. """ return self._names @names.setter def names(self, value): """ Set the names of the features. """ if not isinstance(value, list): raise ValueError("names must be a list") self._names = value @property def outcomes(self): """ A dictionary defining the outcomes of the experiment, including their types and observed data. Example ------- .. code-block:: python outcomes = { 'outcome1': {'type': 'float', 'data': [0.1, 0.2, 0.3]}, 'outcome2': {'type': 'float', 'data': [1.0, 2.0, 3.0]}, } """ return self._outcomes @outcomes.setter def outcomes(self, value): """ Set the outcomes of the experiment with validation. """ if not isinstance(value, dict): raise ValueError("outcomes must be a dictionary") self._outcomes = value self.out_names = list(value.keys()) if self._first_initialization_done: self.initialize_ax_client() @property def fixed_features(self): """ A dictionary defining fixed features with their values. Default is `None`. If provided, the fixed features will be treated as fixed parameters in the generation process. The fixed features should be in the format `{'feature_name': value}`. The values should be the fixed values for the respective features. """ return self._fixed_features @fixed_features.setter def fixed_features(self, value): """ Set the fixed features of the experiment. """ self._fixed_features = None if value is not None: if not isinstance(value, dict): raise ValueError("fixed_features must be a dictionary") for name in value.keys(): if name not in self.names: raise ValueError(f"Fixed feature '{name}' not found in features") # fixed_features should be an ObservationFeatures object self._fixed_features = ObservationFeatures(parameters=value) if self._first_initialization_done: self.set_gs() @property def N(self): """ The number of trials to suggest in each optimization step. Must be a positive integer. Default is `1`. """ return self._N @N.setter def N(self, value): """ Set the number of trials to suggest in each optimization step with validation. """ if not isinstance(value, int) or value <= 0: raise ValueError("N must be a positive integer") self._N = value if self._first_initialization_done: self.set_gs() @property def maximize(self): """ A boolean or dict indicating whether to maximize outcomes in the form ``{'outcome1': True, 'outcome2': False}``. If a single boolean is provided, it is applied to all outcomes. Default is ``True``. """ return self._maximize @maximize.setter def maximize(self, value): """ Set the maximization setting for the outcomes with validation. """ if isinstance(value, bool): self._maximize = {out: value for out in self.out_names} elif isinstance(value, dict) and len(value) == len(self._outcomes): self._maximize = {k:v for k,v in value.items() if (k in self.out_names and isinstance(v, bool))} else: raise ValueError("maximize must be a boolean or a list of booleans with the same length as outcomes") if self._first_initialization_done: self.initialize_ax_client() @property def outcome_constraints(self): """ Constraints on the outcomes, specified as a list of strings. Default is `None`. """ return self._outcome_constraints @outcome_constraints.setter def outcome_constraints(self, value): """ Set the outcome constraints of the experiment with validation. """ if isinstance(value, str): self._outcome_constraints = [value] elif isinstance(value, list): self._outcome_constraints = value else: self._outcome_constraints = None if self._first_initialization_done: self.initialize_ax_client() @property def objective_thresholds(self): """ Reference-point thresholds for multi-objective optimization. Format: ``{'outcome_name': threshold}``. """ return self._objective_thresholds @objective_thresholds.setter def objective_thresholds(self, value): """ Set objective thresholds with validation. """ if value is None: self._objective_thresholds = None elif isinstance(value, dict): validated = {} for name, threshold in value.items(): if name not in self.out_names: raise ValueError(f"Objective threshold provided for unknown outcome '{name}'") if not isinstance(threshold, (int, float)): raise ValueError(f"Objective threshold for '{name}' must be a number") validated[name] = float(threshold) self._objective_thresholds = validated else: raise ValueError("objective_thresholds must be a dictionary or None") if self._first_initialization_done: self.initialize_ax_client() @property def feature_constraints(self): """ Constraints on the features, specified as a list of strings. Default is `None`. Example ------- .. code-block:: python feature_constraints = [ 'feature1 <= 10.0', 'feature1 + 2*feature2 >= 3.0', ] """ return self._feature_constraints @feature_constraints.setter def feature_constraints(self, value): """ Set the feature constraints of the experiment with validation. """ if isinstance(value, dict): self._feature_constraints = [value] elif isinstance(value, list): self._feature_constraints = value elif isinstance(value, str): self._feature_constraints = [value] else: self._feature_constraints = None if self._first_initialization_done: self.initialize_ax_client() @property def optim(self): """ The optimization method to use, either `'bo'` for Bayesian Optimization or `'sobol'` for Sobol sequence. Default is `'bo'`. """ return self._optim @optim.setter def optim(self, value): """ Set the optimization method with validation. """ value = value.lower() if value not in ['bo', 'sobol']: raise ValueError("Optimization method must be either 'bo' or 'sobol'") self._optim = value if self._first_initialization_done: self.set_gs() @property def data(self) -> pd.DataFrame: """ Returns a DataFrame of the current data in the experiment, including features and outcomes. """ feature_data = {name: info['data'] for name, info in self._features.items()} outcome_data = {name: info['data'] for name, info in self._outcomes.items()} data_dict = {**feature_data, **outcome_data} return pd.DataFrame(data_dict) @data.setter def data(self, value: pd.DataFrame): """ Sets the features and outcomes data from a given DataFrame. """ if not isinstance(value, pd.DataFrame): raise ValueError("Data must be a pandas DataFrame") feature_columns = [col for col in value.columns if col in self._features] outcome_columns = [col for col in value.columns if col in self._outcomes] for col in feature_columns: self._features[col]['data'] = value[col].tolist() for col in outcome_columns: self._outcomes[col]['data'] = value[col].tolist() if self._first_initialization_done: self.initialize_ax_client() @property def pareto_frontier(self): """ The Pareto frontier for multi-objective optimization experiments. """ return self._pareto_frontier @pareto_frontier.setter def pareto_frontier(self, value): """ Set the Pareto frontier of the experiment. """ self._pareto_frontier = value @property def acq_func(self): """ The acquisition function to use for the optimization process. It must be a dict with 2 keys: - `acqf`: the acquisition function class to use (e.g., `UpperConfidenceBound`), - `acqf_kwargs`: a dict of the kwargs to pass to the acquisition function class. (e.g. `{'beta': 0.1}`). If not provided, the default acquisition function is used (`LogExpectedImprovement` or `qLogExpectedImprovement` if N>1). Example ------- .. code-block:: python acq_func = { 'acqf': UpperConfidenceBound, 'acqf_kwargs': {'beta': 0.1}, # lower = exploitation, higher = exploration } """ return self._acq_func @acq_func.setter def acq_func(self, value): """ Set the acquisition function with validation. """ self._acq_func = value if self._first_initialization_done: self.set_gs() def __repr__(self): return self.__str__() def __str__(self): """ Return a string representation of the BOExperiment instance. """ return f""" BOExperiment( N={self.N}, maximize={self.maximize}, outcome_constraints={self.outcome_constraints}, feature_constraints={self.feature_constraints}, optim={self.optim} ) Input data: {self.data} """
[docs] def initialize_ax_client(self): """ Initialize the AxClient with the experiment's parameters, objectives, and constraints. """ print('\n======== INITIALIZING MODEL ========\n') self.ax_client = AxClient(verbose_logging=False, suppress_storage_errors=True) self.parameters = [] for name, info in self._features.items(): if info['type'] == 'text': values = [str(val) for val in info['range']] self.parameters.append({ "name": name, "type": "choice", "values": values, "value_type": "str", "is_ordered": len(values) == 2, "sort_values": False}) elif info['type'] == 'int': self.parameters.append({ "name": name, "type": "range", "bounds": [int(np.min(info['range'])), int(np.max(info['range']))], "value_type": "int"}) elif info['type'] == 'float': self.parameters.append({ "name": name, "type": "range", "bounds": [float(np.min(info['range'])), float(np.max(info['range']))], "value_type": "float"}) objectives = {} for k, v in self._maximize.items(): if isinstance(v, bool) and k in self._outcomes.keys(): threshold = None if self._objective_thresholds is not None: threshold = self._objective_thresholds.get(k) objectives[k] = ObjectiveProperties(minimize=not v, threshold=threshold) self.ax_client.create_experiment( name="bayesian_optimization", parameters=self.parameters, objectives=objectives, parameter_constraints=self._feature_constraints, outcome_constraints=self._outcome_constraints, overwrite_existing_experiment=True ) if len(next(iter(self._outcomes.values()))['data']) > 0: for i in range(len(next(iter(self._outcomes.values()))['data'])): params = {name: info['data'][i] for name, info in self._features.items()} outcomes = {name: info['data'][i] for name, info in self._outcomes.items()} self.ax_client.attach_trial(params) self.ax_client.complete_trial(trial_index=i, raw_data=outcomes) self.set_model() self.set_gs()
def _has_completed_data(self) -> bool: """Return whether the current Ax experiment has usable completed data.""" if self.ax_client is None or self.ax_client.experiment is None: return False data = self.ax_client.experiment.fetch_data() if data is None: return False df = getattr(data, "df", None) return df is not None and not df.empty
[docs] def set_model(self): """ Set the model to be used for predictions. This method is called after initializing the AxClient. """ if not self._has_completed_data(): self.model = None return try: self.model = Generators.BOTORCH_MODULAR( experiment=self.ax_client.experiment, data=self.ax_client.experiment.fetch_data() ) except DataRequiredError: self.model = None
[docs] def set_gs(self): """ Set the generation strategy for the experiment. This method is called after initializing the AxClient. """ self.clear_trials() if self._optim == 'bo': if not self.model: self.set_model() if self.model is None: self.gs = GenerationStrategy( steps=[GenerationStep( generator=Generators.SOBOL, num_trials=-1, should_deduplicate=True, model_kwargs={"seed": self.seed}, model_gen_kwargs={}, ) ] ) elif self.acq_func is None: self.gs = GenerationStrategy( steps=[GenerationStep( generator=Generators.BOTORCH_MODULAR, num_trials=-1, # No limitation on how many trials should be produced from this step max_parallelism=3, # Parallelism limit for this step, often lower than for Sobol ) ] ) else: self.gs = GenerationStrategy( steps=[GenerationStep( generator=Generators.BOTORCH_MODULAR, num_trials=-1, # No limitation on how many trials should be produced from this step max_parallelism=3, # Parallelism limit for this step, often lower than for Sobol model_kwargs={ "seed": self.seed, "botorch_acqf_class": self.acq_func['acqf'], "botorch_acqf_options": self.acq_func['acqf_kwargs'], }, ) ] ) elif self._optim == 'sobol': self.gs = GenerationStrategy( steps=[GenerationStep( generator=Generators.SOBOL, num_trials=-1, # How many trials should be produced from this generation step should_deduplicate=True, # Deduplicate the trials model_kwargs={"seed": self.seed}, # Any kwargs you want passed into the model model_gen_kwargs={}, # Any kwargs you want passed to `modelbridge.gen` ) ] ) generated_runs = self.gs.gen( experiment=self.ax_client.experiment, # Ax `Experiment`, for which to generate new candidates data=None, # Ax `Data` to use for model training, optional. n=self._N, # Number of candidate arms to produce fixed_features=self._fixed_features, pending_observations=None, ) self.generator_run = generated_runs[0][0]
[docs] def clear_trials(self): """ Clear all trials in the experiment. """ # Get all pending trial indices pending_trials = [k for k,i in self.ax_client.experiment.trials.items() if i.status==TrialStatus.CANDIDATE] for i in pending_trials: self.ax_client.experiment.trials[i].mark_abandoned()
[docs] def suggest_next_trials(self, with_predicted=True): """ Suggest the next set of trials based on the current model and optimization strategy. Returns ------- pd.DataFrame: DataFrame containing the suggested trials and their predicted outcomes. """ self.clear_trials() if self.ax_client is None: self.initialize_ax_client() if self._N == 1: self.candidate = self.ax_client.experiment.new_trial(self.generator_run) else: self.candidate = self.ax_client.experiment.new_batch_trial(self.generator_run) if hasattr(self.candidate, "arms"): arm_parameters = [arm.parameters for arm in self.candidate.arms] else: arm_parameters = [self.candidate.arm.parameters] trials = pd.DataFrame(arm_parameters) trials = trials[[name for name in self.names]] if with_predicted and self.model is None: return trials.reset_index(drop=True) if with_predicted: topred = [trials.iloc[i].to_dict() for i in range(len(trials))] preds = self.predict(topred)[0] preds = pd.DataFrame(preds) # add 'predicted_' to the names of the pred dataframe preds.columns = [f'Predicted_{col}' for col in preds.columns] preds = preds.reset_index(drop=True) trials = trials.reset_index(drop=True) return pd.concat([trials, preds], axis=1) else: return trials
def _get_observed_best_parameters(self): """Return best observed rows when no fitted Ax model is available yet.""" data = self.data.copy() objective_names = [name for name, maximize in self._maximize.items() if isinstance(maximize, bool)] if len(objective_names) == 0: return pd.DataFrame() data = data.dropna(subset=objective_names) if data.empty: return pd.DataFrame(columns=self.data.columns) if len(objective_names) == 1: objective_name = objective_names[0] if self._maximize[objective_name]: best_index = data[objective_name].idxmax() else: best_index = data[objective_name].idxmin() return data.loc[[best_index]].reset_index(drop=True) return data.reset_index(drop=True)
[docs] def predict(self, params): """ Predict the outcomes for a given set of parameters using the current model. Parameters ---------- params : List[Dict[str, Any]] List of parameter dictionaries for which to predict outcomes. Returns ------- List[Dict[str, float]]: List of predicted outcomes for the given parameters. """ if self.ax_client is None: self.initialize_ax_client() if self.model is None: raise ValueError("Predictions require at least one completed experiment with numeric outcome data.") obs_feats = [ObservationFeatures(parameters=p) for p in params] f, cm = self.model.predict(obs_feats) # return prediction and std errors as a list of dictionaries # Convert to list of dictionaries predictions = [] for i in range(len(obs_feats)): pred_dict = {} for metric_name in f.keys(): pred_dict[metric_name] = { 'mean': f[metric_name][i], 'std': np.sqrt(cm[metric_name][metric_name][i]) } predictions.append(pred_dict) preds = [{k: v['mean'] for k, v in pred.items()} for pred in predictions] stderrs = [{k: v['std'] for k, v in pred.items()} for pred in predictions] return preds, stderrs
[docs] def update_experiment(self, params, outcomes): """ Update the experiment with new parameters and outcomes, and reinitialize the AxClient. Parameters ---------- params : Dict[str, Any] Dictionary of new parameters to update the experiment with. outcomes : Dict[str, Any] Dictionary of new outcomes to update the experiment with. """ # append new data to the features and outcomes dictionaries for k, v in zip(params.keys(), params.values()): if k not in self._features: raise ValueError(f"Parameter '{k}' not found in features") if isinstance(v, np.ndarray): v = v.tolist() if not isinstance(v, list): v = [v] self._features[k]['data'] += v for k, v in zip(outcomes.keys(), outcomes.values()): if k not in self._outcomes: raise ValueError(f"Outcome '{k}' not found in outcomes") if isinstance(v, np.ndarray): v = v.tolist() if not isinstance(v, list): v = [v] self._outcomes[k]['data'] += v self.initialize_ax_client()
[docs] def plot_model(self, metricname=None, slice_values={}, linear=False): """ Plot the model's predictions for the experiment's parameters and outcomes. Parameters ---------- metricname : Optional[str] The name of the metric to plot. If None, the first outcome metric is used. slice_values : Optional[Dict[str, Any]] Dictionary of slice values for plotting. linear : bool Whether to plot a linear slice plot. Default is False. Returns ------- plotly.graph_objects.Figure: Plotly figure of the model's predictions. """ if self.ax_client is None: self.initialize_ax_client() self.suggest_next_trials() cand_name = 'Candidate' if self._N == 1 else 'Candidates' mname = self.ax_client.objective_names[0] if metricname is None else metricname param_name = [name for name in self.names if name not in slice_values.keys()] par_numeric = [name for name in param_name if self._features[name]['type'] in ['int', 'float']] if self.model is None: completed_trials = self.ax_client.get_trials_data_frame() completed_trials = completed_trials[completed_trials['trial_status'] != 'CANDIDATE'].copy() if mname not in completed_trials.columns: return go.Figure() if len(par_numeric) == 0: return go.Figure() if len(par_numeric) == 1: fig = px.scatter( completed_trials, x=par_numeric[0], y=mname, title=f"Observed {mname} vs {par_numeric[0]}", ) elif len(par_numeric) == 2: fig = px.scatter( completed_trials, x=par_numeric[0], y=par_numeric[1], color=mname, color_continuous_scale="Viridis", title=f"Observed {par_numeric[1]} vs {par_numeric[0]}", ) else: fig = px.scatter_matrix( completed_trials, dimensions=par_numeric + [mname], title=f"Observed relationships for {mname}", ) return fig if len(par_numeric) == 1: fig = plot_slice( model=self.model, metric_name=mname, density=100, param_name=par_numeric[0], generator_runs_dict={cand_name: self.generator_run}, slice_values=slice_values ) elif len(par_numeric) == 2: fig = plot_contour( model=self.model, metric_name=mname, param_x=par_numeric[0], param_y=par_numeric[1], generator_runs_dict={cand_name: self.generator_run}, slice_values=slice_values ) else: # remove sliced parameters from par_numeric pars = [p for p in par_numeric if p not in slice_values.keys()] fig = interact_contour( model=self.model, generator_runs_dict={cand_name: self.generator_run}, metric_name=mname, slice_values=slice_values, parameters_to_use=pars ) plotly_fig = go.Figure(fig.data) all_trials = self.ax_client.get_trials_data_frame() completed_trials = all_trials[all_trials['trial_status'] != 'CANDIDATE'].copy() # compute distance to slice col_to_consider = completed_trials[[k for k in slice_values.keys()]] completed_trials.loc[:, 'signed_dist_to_slice'] = ( (col_to_consider - slice_values).sum(axis=1) # Sum of signed differences ) signed_dists = completed_trials['signed_dist_to_slice'].values positive_dists = signed_dists[signed_dists >= 0] negative_dists = signed_dists[signed_dists < 0] # Normalize positive distances to [0, 1] if len(positive_dists) > 0 and np.max(positive_dists) > 0: normalized_positive = positive_dists / np.max(positive_dists) else: normalized_positive = np.zeros_like(positive_dists) # Normalize negative distances to [-1, 0] if len(negative_dists) > 0 and np.min(negative_dists) < 0: normalized_negative = negative_dists / np.abs(np.min(negative_dists)) else: normalized_negative = np.zeros_like(negative_dists) # Combine the normalized distances normalized_signed_dists = np.zeros_like(signed_dists) normalized_signed_dists[signed_dists >= 0] = normalized_positive normalized_signed_dists[signed_dists < 0] = normalized_negative completed_trials.loc[:, 'normalized_signed_dist'] = normalized_signed_dists coolwarm = mpl.colormaps['bwr'] normalized_values = (completed_trials['normalized_signed_dist'] + 1) / 2 # Map from [-1,1] to [0,1] colors = [ f"rgb({int(r*255)}, {int(g*255)}, {int(b*255)})" for r, g, b, _ in coolwarm(normalized_values) ] completed_trials.loc[:, 'colors'] = colors trials = self.ax_client.get_trials_data_frame() trials = trials[trials['trial_status'] == 'CANDIDATE'] trials = trials[[name for name in self.names]] in_sample_trace_idx = 0 for trace in plotly_fig.data: if trace.type == "contour": trace.colorscale = "viridis" if 'marker' in trace and trace.legendgroup != cand_name: arm_names = [] if trace['text']: for text in trace['text']: print(text) match = re.search(r'Arm (\d+_\d+)', text) if match: arm_names.append(match.group(1)) arm_to_color = dict(zip(completed_trials['arm_name'], completed_trials['colors'])) trace.marker.color = [arm_to_color[arm] for arm in arm_names] trace.marker.symbol = "circle" trace.marker.size = 10 trace.marker.line.width = 2 trace.marker.line.color = 'black' # if len(opacities) > 0: # trace.marker.opacity = opacities if trace.text is not None: trace.text = [t.replace('Arm', '<b>Sample').replace("_0","</b>") for t in trace.text] if trace.legendgroup == cand_name: trace.marker.line.color = 'red' trace.marker.color = "orange" trace.name = cand_name trace.marker.symbol = "x" trace.marker.size = 12 trace.marker.opacity = 1 trace.hoverinfo = "text" trace.hoverlabel = dict(bgcolor="#f8e3cd", font_color='black') if trace.text is not None: trace.text = [t.replace("<i>","").replace("</i>","") for t in trace.text] trace.text = [ f"<b>Candidate {i+1}</b><br>{'<br>'.join([f'{col}: {val}' for col, val in trials.iloc[i].items()])}" for t in trace.text for i in range(len(trials)) ] plotly_fig.update_layout( plot_bgcolor="white", legend=dict(bgcolor='rgba(0,0,0,0)'), margin=dict(l=10, r=10, t=50, b=50), xaxis=dict( showgrid=True, gridcolor="lightgray", zeroline=False, zerolinecolor="black", showline=True, linewidth=1, linecolor="black", mirror=True ), yaxis=dict( showgrid=True, gridcolor="lightgray", zeroline=False, zerolinecolor="black", showline=True, linewidth=1, linecolor="black", mirror=True ), xaxis2=dict( showgrid=True, gridcolor="lightgray", zeroline=False, zerolinecolor="black", showline=True, linewidth=1, linecolor="black", mirror=True ), yaxis2=dict( showgrid=True, gridcolor="lightgray", zeroline=False, zerolinecolor="black", showline=True, linewidth=1, linecolor="black", mirror=True ), ) return plotly_fig
[docs] def plot_optimization_trace(self, optimum=None): """ Plot the optimization trace, showing the progress of the optimization over trials. Parameters ---------- optimum : Optional[float] The optimal value to plot on the optimization trace. Returns ------- plotly.graph_objects.Figure: Plotly figure of the optimization trace. """ if self.ax_client is None: self.initialize_ax_client() if len(self._outcomes) > 1: print("Optimization trace is not available for multi-objective optimization.") return None fig = self.ax_client.get_optimization_trace(objective_optimum=optimum) fig = go.Figure(fig.data) for trace in fig.data: # add hover info trace.hoverinfo = "x+y" fig.update_layout( plot_bgcolor="white", # White background legend=dict(bgcolor='rgba(0,0,0,0)'), margin=dict(l=50, r=10, t=50, b=50), xaxis=dict( showgrid=True, # Enable grid gridcolor="lightgray", # Light gray grid lines zeroline=False, zerolinecolor="black", # Black zero line showline=True, linewidth=1, linecolor="black", # Black border mirror=True ), yaxis=dict( showgrid=True, # Enable grid gridcolor="lightgray", # Light gray grid lines zeroline=False, zerolinecolor="black", # Black zero line showline=True, linewidth=1, linecolor="black", # Black border mirror=True ), ) return fig
[docs] def plot_feature_importances(self, relative=False): """ Plot feature importances using Ax default Sensitivity Analysis cards (same analysis family as in Ax tutorials). Parameters ---------- relative : bool, optional Used only by the fallback Ax helper plot if analysis cards are unavailable. Default is False. Returns ------- plotly.graph_objects.Figure: Plotly figure of feature importances. """ if self.ax_client is None: self.initialize_ax_client() if self.model is None: self.set_model() def _style_sensitivity_figure(fig): def _humanize_sensitivity_label(label): if not isinstance(label, str) or "_OH_PARAM_" not in label: return label match = re.match(r"^(?P<feature>.+)_OH_PARAM_(?P<index>\d+)$", label) if match is None: return label feature_name = match.group("feature") category_index = int(match.group("index")) feature_info = self._features.get(feature_name) if feature_info is None: return label category_values = feature_info.get("range", []) if not isinstance(category_values, list) or category_index >= len(category_values): return feature_name return f"{feature_name}: {category_values[category_index]}" for trace in fig.data: trace_name = str(getattr(trace, "name", "") or "") if "Increases" in trace_name: trace.marker.color = "#2ca02c" elif "Decreases" in trace_name: trace.marker.color = "#d62728" # Keep bars centered on category rows for cleaner label alignment. if getattr(trace, "type", None) == "bar": trace.width = 0.85 trace.offsetgroup = None trace.alignmentgroup = None y_values = getattr(trace, "y", None) if y_values is not None: trace.y = [_humanize_sensitivity_label(value) for value in y_values] hovertemplate = getattr(trace, "hovertemplate", None) if hovertemplate is not None: hovertemplate = hovertemplate.replace("truncated_parameter_name=%{y}<br>", "") hovertemplate = hovertemplate.replace("truncated_parameter_name=%{y}<br />", "") trace.hovertemplate = hovertemplate fig.update_layout(barmode="overlay") fig.update_yaxes(title_text="") fig.update_xaxes(title_text="Importance") return fig metric_names = list(self.ax_client.objective_names) if len(metric_names) == 0: return None figures = [] labels = [] for metric_name in metric_names: try: card = SensitivityAnalysisPlot(metric_name=metric_name).compute( experiment=self.ax_client.experiment, generation_strategy=self.gs, adapter=self.model, ) figures.append(_style_sensitivity_figure(card.get_figure())) labels.append(metric_name) except (AttributeError, RuntimeError, ValueError, TypeError, KeyError): continue if len(figures) == 0: # Fallback to legacy Ax plot helper if analysis cards are unavailable. try: fig = plot_feature_importance_by_feature_plotly( model=self.model, relative=relative, ) return _style_sensitivity_figure(fig) except (AttributeError, RuntimeError, ValueError, TypeError, KeyError): return None if len(figures) == 1: return _style_sensitivity_figure(figures[0]) merged = go.Figure() trace_blocks = [] for fig in figures: start = len(merged.data) for tr in fig.data: merged.add_trace(tr) end = len(merged.data) trace_blocks.append((start, end)) for i, (start, end) in enumerate(trace_blocks): for j, _ in enumerate(merged.data): merged.data[j].visible = (i == 0 and start <= j < end) buttons = [] for i, metric_name in enumerate(labels): vis = [False] * len(merged.data) start, end = trace_blocks[i] for j in range(start, end): vis[j] = True button = { "label": metric_name, "method": "update", "args": [ {"visible": vis}, {"title": f"Sensitivity Analysis for {metric_name}"}, ], } buttons.append(button) merged.update_layout(figures[0].layout) merged.update_layout( updatemenus=[ { "x": 1.0, "xanchor": "right", "y": 1.15, "yanchor": "top", "buttons": buttons, } ] ) return _style_sensitivity_figure(merged)
[docs] def compute_pareto_frontier(self): """ Compute the Pareto frontier for multi-objective optimization experiments. Returns ------- The Pareto frontier. """ if self.ax_client is None: self.initialize_ax_client() if len(self._outcomes) < 2: print("Pareto frontier is not available for single-objective optimization.") return None if self.Nmetrics == 2: objectives = self.ax_client.experiment.optimization_config.objective.objectives self.pareto_frontier = compute_posterior_pareto_frontier( experiment=self.ax_client.experiment, data=self.ax_client.experiment.fetch_data(), primary_objective=objectives[1].metric, secondary_objective=objectives[0].metric, absolute_metrics=[o.metric.name for o in objectives], num_points=20, ) else: # For 3+ objectives, keep Pareto-optimal points and visualize in plot_pareto_frontier. self.pareto_frontier = self.ax_client.get_pareto_optimal_parameters() return self.pareto_frontier
[docs] def plot_pareto_frontier(self, show_error_bars=True): """ Plot the Pareto frontier for multi-objective optimization experiments. Parameters ---------- show_error_bars : bool, optional Whether to show error bars on the plot. Default is True. Returns ------- plotly.graph_objects.Figure: Plotly figure of the Pareto frontier. """ if self.pareto_frontier is None: return None if self.Nmetrics > 2: df = ordered_dict_to_dataframe(self.pareto_frontier) objective_names = [name for name in self.ax_client.objective_names if name in df.columns] if len(objective_names) < 2: return None fig = px.scatter_matrix( df, dimensions=objective_names, hover_data=[name for name in self.names if name in df.columns], title="Pareto-optimal objective trade-offs", ) fig.update_traces(diagonal_visible=False) else: fig = plot_pareto_frontier(self.pareto_frontier) fig = go.Figure(fig.data) # Modify traces to show/hide error bars if not show_error_bars: for trace in fig.data: # Remove error bars by setting them to None if hasattr(trace, 'error_x') and trace.error_x is not None: trace.error_x = None if hasattr(trace, 'error_y') and trace.error_y is not None: trace.error_y = None fig.update_layout( plot_bgcolor="white", # White background legend=dict(bgcolor='rgba(0,0,0,0)'), margin=dict(l=50, r=10, t=50, b=50), xaxis=dict( showgrid=True, # Enable grid gridcolor="lightgray", # Light gray grid lines zeroline=False, zerolinecolor="black", # Black zero line showline=True, linewidth=1, linecolor="black", # Black border mirror=True ), yaxis=dict( showgrid=True, # Enable grid gridcolor="lightgray", # Light gray grid lines zeroline=False, zerolinecolor="black", # Black zero line showline=True, linewidth=1, linecolor="black", # Black border mirror=True ), ) return fig
[docs] def get_best_parameters(self): """ Return the best parameters found by the optimization process. Returns ------- pd.DataFrame: DataFrame containing the best parameters and their outcomes. """ if self.ax_client is None: self.initialize_ax_client() if self.model is None: return self._get_observed_best_parameters() if self.Nmetrics == 1: best_result = self.ax_client.get_best_parameters() if best_result is None or best_result[0] is None or best_result[1] is None: return self._get_observed_best_parameters() best_parameters = best_result[0] best_outcomes = best_result[1] best_parameters.update(best_outcomes[0]) best = pd.DataFrame(best_parameters, index=[0]) else: best_parameters = self.ax_client.get_pareto_optimal_parameters() if best_parameters is None: return self._get_observed_best_parameters() best = ordered_dict_to_dataframe(best_parameters) return best
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
[docs] def flatten_dict(d, parent_key="", sep="_"): """ Flatten a nested dictionary. """ items = [] for k, v in d.items(): new_key = f"{parent_key}{sep}{k}" if parent_key else k if isinstance(v, dict): items.extend(flatten_dict(v, new_key, sep=sep).items()) else: items.append((new_key, v)) return dict(items)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
[docs] def ordered_dict_to_dataframe(data): """ Convert an OrderedDict with arbitrary nesting to a DataFrame. """ dflat = flatten_dict(data) out = [] for key, value in dflat.items(): main_dict = value[0] sub_dict = value[1][0] out.append([value for value in main_dict.values()] + [value for value in sub_dict.values()]) df = pd.DataFrame(out, columns=[key for key in main_dict.keys()] + [key for key in sub_dict.keys()]) return df
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
[docs] def read_experimental_data(file_path: str, out_pos=[-1]) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]: """ Read experimental data from a CSV file and format it into features and outcomes dictionaries. Parameters ---------- file_path (str) Path to the CSV file containing experimental data. out_pos (list of int) Column indices of the outcome variables. Default is the last column. Returns ------- Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]] Formatted features and outcomes dictionaries. """ data = pd.read_csv(file_path) data = clean_names(data, remove_special=True, case_type='preserve') outcome_column_name = data.columns[out_pos] features = data.loc[:, ~data.columns.isin(outcome_column_name)].copy() outcomes = data[outcome_column_name].copy() feature_definitions = {} for column in features.columns: if features[column].dtype == 'object': unique_values = features[column].unique() feature_definitions[column] = {'type': 'text', 'range': unique_values.tolist()} elif features[column].dtype in ['int64', 'float64']: min_val = features[column].min() max_val = features[column].max() feature_type = 'int' if features[column].dtype == 'int64' else 'float' feature_definitions[column] = {'type': feature_type, 'range': [min_val, max_val]} formatted_features = {name: {'type': info['type'], 'data': features[name].tolist(), 'range': info['range']} for name, info in feature_definitions.items()} # same for outcomes with just type and data outcome_definitions = {} for column in outcomes.columns: if outcomes[column].dtype == 'object': unique_values = outcomes[column].unique() outcome_definitions[column] = {'type': 'text', 'data': unique_values.tolist()} elif outcomes[column].dtype in ['int64', 'float64']: min_val = outcomes[column].min() max_val = outcomes[column].max() outcome_type = 'int' if outcomes[column].dtype == 'int64' else 'float' outcome_definitions[column] = {'type': outcome_type, 'data': outcomes[column].tolist()} formatted_outcomes = {name: {'type': info['type'], 'data': outcomes[name].tolist()} for name, info in outcome_definitions.items()} return formatted_features, formatted_outcomes