Source code for optimeo.bo

# Copyright (c) 2025 Colin BOUSIGE
# Contact: colin.bousige@cnrs.fr
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the MIT License as published by
# the Free Software Foundation, either version 3 of the License, or
# any later version.

"""
This module provides a class for optimizing experiments using Bayesian Optimization (BO) with the [Ax platform](https://ax.dev/).
It includes methods for initializing the experiment, suggesting trials, predicting outcomes, and plotting results.

You can see an example notebook [here](../examples/bo.ipynb).

"""

import matplotlib as mpl
import re
import plotly.express as px
import plotly.graph_objects as go
from botorch.acquisition.analytic import *
from ax.service.ax_client import AxClient, ObjectiveProperties
from ax.plot.slice import plot_slice
from ax.plot.pareto_utils import compute_posterior_pareto_frontier
from ax.plot.pareto_frontier import plot_pareto_frontier
from ax.plot.feature_importances import plot_feature_importance_by_feature_plotly
from ax.plot.contour import interact_contour, plot_contour
from ax.analysis.plotly.sensitivity import SensitivityAnalysisPlot
from ax.generation_strategy.generation_strategy import GenerationStrategy
from ax.generation_strategy.generation_node import GenerationStep
from ax.exceptions.core import DataRequiredError
from ax.core.trial_status import TrialStatus
from ax.core.observation import ObservationFeatures
from ax.adapter.registry import Generators
from typing import Any, Dict, List, Optional, Union, Tuple
from janitor import clean_names
import random
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)
warnings.simplefilter(action='ignore', category=UserWarning)
warnings.simplefilter(action='ignore', category=RuntimeError)


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



[docs]
class BOExperiment:
    """
    BOExperiment is a class designed to facilitate Bayesian Optimization experiments using the [Ax platform](https://ax.dev/).
    It encapsulates the experiment setup, including features, outcomes, constraints, and optimization methods.

    Example
    -------

    .. code-block:: python

        from optimeo.bo import BOExperiment, read_experimental_data

        features, outcomes = read_experimental_data('data.csv', out_pos=[-2, -1])
        experiment = BOExperiment(
            features,
            outcomes,
            N=5,
            maximize={'out1': True, 'out2': False},
        )
        experiment.suggest_next_trials()
        experiment.plot_model(metricname='outcome1')
        experiment.plot_optimization_trace()

    Parameters
    ----------
    features: Dict[str, Dict[str, Any]]
        A dictionary defining the features of the experiment, including their types and ranges.
        Each feature is represented as a dictionary with keys 'type', 'data', and 'range'.
        - 'type': The type of the feature (e.g., 'int', 'float', 'text').
        - 'data': The observed data for the feature.
        - 'range': The range of values for the feature.
    outcomes: Dict[str, Dict[str, Any]]
        A dictionary defining the outcomes of the experiment, including their types and observed data.
        Each outcome is represented as a dictionary with keys 'type' and 'data'.
        - 'type': The type of the outcome (e.g., 'int', 'float').
        - 'data': The observed data for the outcome.
    ranges: Optional[Dict[str, Dict[str, Any]]]
        A dictionary defining the ranges of the features. Default is `None`.
        If not provided, the ranges will be inferred from the features data.
        The ranges should be in the format `{'feature_name': [minvalue,maxvalue]}`.
    N: int
        The number of trials to suggest in each optimization step. Must be a positive integer.
    maximize: Union[bool, Dict[str, bool]]
        A boolean or dict indicating whether to maximize the outcomes in the form `{'outcome1':True, 'outcome2':False}`.
        If a single boolean is provided, it is applied to all outcomes. Default is `True`.
    fixed_features: Optional[Dict[str, Any]]
        A dictionary defining fixed features with their values. Default is `None`.
        If provided, the fixed features will be treated as fixed parameters in the generation process.
        The fixed features should be in the format `{'feature_name': value}`.
        The values should be the fixed values for the respective features.
    outcome_constraints: Optional[List[str]]
        Constraints on the outcomes, specified as a list of strings. Default is `None`.
        The constraints should be in the format `{'outcome_name': [minvalue,maxvalue]}`.
    objective_thresholds: Optional[Dict[str, float]]
        Reference-point thresholds for multi-objective optimization, specified per outcome
        in the format ``{'outcome_name': threshold}``. Default is `None`.
        Setting these avoids Ax's default winsorization warning for multi-objective runs.
    feature_constraints: Optional[List[str]]
        Constraints on the features, specified as a list of strings. Default is `None`.
        The constraints should be in the format `{'feature_name': [minvalue,maxvalue]}`.
    optim: str
        The optimization method to use, either 'bo' for Bayesian Optimization or 'sobol' for Sobol sequence. Default is 'bo'.
    acq_func: Optional[Dict[str, Any]]
        The acquisition function to use for the optimization process. It must be a dict with 2 keys:
        - `acqf`: the acquisition function class to use (e.g., `UpperConfidenceBound`),
        - `acqf_kwargs`: a dict of the kwargs to pass to the acquisition function class. (e.g. `{'beta': 0.1}`).

        If not provided, the default acquisition function is used (`LogExpectedImprovement` or `qLogExpectedImprovement` if N>1).

    Attributes
    ----------

    features: Dict[str, Dict[str, Any]]
        A dictionary defining the features of the experiment, including their types and ranges.
    outcomes: Dict[str, Dict[str, Any]]
        A dictionary defining the outcomes of the experiment, including their types and observed data.
    N: int
        The number of trials to suggest in each optimization step. Must be a positive integer.
    maximize: Union[bool, List[bool]]
        A boolean or list of booleans indicating whether to maximize the outcomes.
        If a single boolean is provided, it is applied to all outcomes.
    outcome_constraints: Optional[Dict[str, Dict[str, float]]]
        Constraints on the outcomes, specified as a dictionary or list of dictionaries.
    feature_constraints: Optional[List[Dict[str, Any]]]
        Constraints on the features, specified as a list of dictionaries.
    optim: str
        The optimization method to use, either 'bo' for Bayesian Optimization or 'sobol' for Sobol sequence.
    data: pd.DataFrame
        A DataFrame representing the current data in the experiment, including features and outcomes.
    acq_func: dict
        The acquisition function to use for the optimization process. 
    generator_run:
        The generator run for the experiment, used to generate new candidates.
    model:
        The model used for predictions in the experiment.
    ax_client:
        The AxClient for the experiment, used to manage trials and data.
    gs:
        The generation strategy for the experiment, used to generate new candidates.
    parameters:
        The parameters for the experiment, including their types and ranges.
    names:
        The names of the features in the experiment.
    fixed_features:
        The fixed features for the experiment, used to generate new candidates.
    candidate:
        The candidate(s) suggested by the optimization process.


    Methods
    -------
    initialize_ax_client()
        Initialize AxClient with experiment parameters, objectives, and constraints.
    suggest_next_trials()
        Suggest next trial(s) from the current model and generation strategy.
    predict(params)
        Predict outcomes for a list of parameter dictionaries.
    update_experiment(params, outcomes)
        Update the experiment with new observations and refresh internal state.
    plot_model(metricname=None, slice_values=None, linear=False)
        Plot model predictions as slices or contours.
    plot_optimization_trace(optimum=None)
        Plot optimization progress over trials.
    plot_pareto_frontier()
        Plot Pareto frontier for multi-objective problems.
    get_best_parameters()
        Return best parameter set(s) and associated outcomes.
    clear_trials()
        Remove all current trials.

    """

    def __init__(self,
                 features: Dict[str, Dict[str, Any]],
                 outcomes: Dict[str, Dict[str, Any]],
                 ranges: Optional[Dict[str, Dict[str, Any]]] = None,
                 N=1,
                 maximize: Union[bool, Dict[str, bool]] = True,
                 fixed_features: Optional[Dict[str, Any]] = None,
                 outcome_constraints: Optional[List[str]] = None,
                 objective_thresholds: Optional[Dict[str, float]] = None,
                 feature_constraints: Optional[List[str]] = None,
                 optim='bo',
                 acq_func=None,
                 seed=42) -> None:
        self._first_initialization_done = False
        self.ranges = ranges
        self.features = features
        self.names = list(self._features.keys())
        self.fixed_features = fixed_features
        self.outcomes = outcomes
        self.N = N
        self.maximize = maximize
        self.outcome_constraints = outcome_constraints
        self.objective_thresholds = objective_thresholds
        self.feature_constraints = feature_constraints
        self.optim = optim
        self.acq_func = acq_func
        self.seed = seed
        self.candidate = None
        """The candidate(s) suggested by the optimization process."""
        self.ax_client = None
        """Ax's client for the experiment."""
        self.model = None
        """Ax's Gaussian Process model."""
        self.parameters = None
        """Ax's parameters for the experiment."""
        self.generator_run = None
        """Ax's generator run for the experiment."""
        self.gs = None
        """Ax's generation strategy for the experiment."""
        self.initialize_ax_client()
        self.Nmetrics = len(self.ax_client.objective_names)
        """The number of metrics in the experiment."""
        self._first_initialization_done = True
        """To indicate that the first initialization is done so that we don't call `initialize_ax_client()` again."""
        self.pareto_frontier = None
        """The Pareto frontier for multi-objective optimization experiments."""

    @property
    def seed(self) -> int:
        """Random seed for reproducibility. Default is 42."""
        return self._seed

    @seed.setter
    def seed(self, value: int):
        """Set the random seed."""
        if isinstance(value, int):
            self._seed = value
        else:
            raise Warning("Seed must be an integer. Using default seed 42.")
            self._seed = 42
        random.seed(self.seed)
        np.random.seed(self.seed)

    @property
    def features(self):
        """
        A dictionary defining the features of the experiment, including their types and ranges.

        Example
        -------
        .. code-block:: python

            features = {
                'feature1': {'type': 'int', 'data': [1, 2, 3], 'range': [1, 3]},
                'feature2': {'type': 'float', 'data': [0.1, 0.2, 0.3], 'range': [0.1, 0.3]},
                'feature3': {'type': 'text', 'data': ['A', 'B', 'C'], 'range': ['A', 'B', 'C']},
            }
        """
        return self._features

    @features.setter
    def features(self, value):
        """
        Set the features of the experiment with validation.
        """
        if not isinstance(value, dict):
            raise ValueError("features must be a dictionary")
        self._features = value
        for name in self._features.keys():
            if self.ranges and name in self.ranges.keys():
                self._features[name]['range'] = self.ranges[name]
            else:
                feature_data = self._features[name].get('data', [])
                if len(feature_data) == 0:
                    self._features[name]['range'] = self._features[name].get(
                        'range', [])
                elif self._features[name]['type'] == 'text':
                    self._features[name]['range'] = list(set(feature_data))
                elif self._features[name]['type'] == 'int':
                    self._features[name]['range'] = [int(np.min(self._features[name]['data'])),
                                                     int(np.max(self._features[name]['data']))]
                elif self._features[name]['type'] == 'float':
                    self._features[name]['range'] = [float(np.min(self._features[name]['data'])),
                                                     float(np.max(self._features[name]['data']))]
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def ranges(self):
        """
        A dictionary defining the ranges of the features. Default is `None`.

        If not provided, the ranges will be inferred from the features data.
        The ranges should be in the format `{'feature_name': [minvalue,maxvalue]}`.
        """
        return self._ranges

    @ranges.setter
    def ranges(self, value):
        """
        Set the ranges of the features with validation.
        """
        if value is not None:
            if not isinstance(value, dict):
                raise ValueError("ranges must be a dictionary")
        self._ranges = value

    @property
    def names(self):
        """
        The names of the features.
        """
        return self._names

    @names.setter
    def names(self, value):
        """
        Set the names of the features.
        """
        if not isinstance(value, list):
            raise ValueError("names must be a list")
        self._names = value

    @property
    def outcomes(self):
        """
        A dictionary defining the outcomes of the experiment, including their types and observed data.

        Example
        -------
        .. code-block:: python

            outcomes = {
                'outcome1': {'type': 'float', 'data': [0.1, 0.2, 0.3]},
                'outcome2': {'type': 'float', 'data': [1.0, 2.0, 3.0]},
            }
        """
        return self._outcomes

    @outcomes.setter
    def outcomes(self, value):
        """
        Set the outcomes of the experiment with validation.
        """
        if not isinstance(value, dict):
            raise ValueError("outcomes must be a dictionary")
        self._outcomes = value
        self.out_names = list(value.keys())
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def fixed_features(self):
        """
        A dictionary defining fixed features with their values. Default is `None`.
        If provided, the fixed features will be treated as fixed parameters in the generation process.
        The fixed features should be in the format `{'feature_name': value}`.
        The values should be the fixed values for the respective features.
        """
        return self._fixed_features

    @fixed_features.setter
    def fixed_features(self, value):
        """
        Set the fixed features of the experiment.
        """
        self._fixed_features = None
        if value is not None:
            if not isinstance(value, dict):
                raise ValueError("fixed_features must be a dictionary")
            for name in value.keys():
                if name not in self.names:
                    raise ValueError(
                        f"Fixed feature '{name}' not found in features")
            # fixed_features should be an ObservationFeatures object
            self._fixed_features = ObservationFeatures(parameters=value)
        if self._first_initialization_done:
            self.set_gs()

    @property
    def N(self):
        """
        The number of trials to suggest in each optimization step. Must be a positive integer. Default is `1`.
        """
        return self._N

    @N.setter
    def N(self, value):
        """
        Set the number of trials to suggest in each optimization step with validation.
        """
        if not isinstance(value, int) or value <= 0:
            raise ValueError("N must be a positive integer")
        self._N = value
        if self._first_initialization_done:
            self.set_gs()

    @property
    def maximize(self):
        """
        A boolean or dict indicating whether to maximize outcomes in the form ``{'outcome1': True, 'outcome2': False}``.
        If a single boolean is provided, it is applied to all outcomes. Default is ``True``.
        """
        return self._maximize

    @maximize.setter
    def maximize(self, value):
        """
        Set the maximization setting for the outcomes with validation.
        """
        if isinstance(value, bool):
            self._maximize = {out: value for out in self.out_names}
        elif isinstance(value, dict) and len(value) == len(self._outcomes):
            self._maximize = {k: v for k, v in value.items() if
                              (k in self.out_names and isinstance(v, bool))}
        else:
            raise ValueError(
                "maximize must be a boolean or a list of booleans with the same length as outcomes")
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def outcome_constraints(self):
        """
        Constraints on the outcomes, specified as a list of strings. Default is `None`.
        """
        return self._outcome_constraints

    @outcome_constraints.setter
    def outcome_constraints(self, value):
        """
        Set the outcome constraints of the experiment with validation.
        """
        if isinstance(value, str):
            self._outcome_constraints = [value]
        elif isinstance(value, list):
            self._outcome_constraints = value
        else:
            self._outcome_constraints = None
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def objective_thresholds(self):
        """
        Reference-point thresholds for multi-objective optimization.

        Format: ``{'outcome_name': threshold}``.
        """
        return self._objective_thresholds

    @objective_thresholds.setter
    def objective_thresholds(self, value):
        """
        Set objective thresholds with validation.
        """
        if value is None:
            self._objective_thresholds = None
        elif isinstance(value, dict):
            validated = {}
            for name, threshold in value.items():
                if name not in self.out_names:
                    raise ValueError(
                        f"Objective threshold provided for unknown outcome '{name}'")
                if not isinstance(threshold, (int, float)):
                    raise ValueError(
                        f"Objective threshold for '{name}' must be a number")
                validated[name] = float(threshold)
            self._objective_thresholds = validated
        else:
            raise ValueError(
                "objective_thresholds must be a dictionary or None")
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def feature_constraints(self):
        """
        Constraints on the features, specified as a list of strings. Default is `None`.

        Example
        -------
        .. code-block:: python

            feature_constraints = [
                'feature1 <= 10.0',
                'feature1 + 2*feature2 >= 3.0',
            ]
        """
        return self._feature_constraints

    @feature_constraints.setter
    def feature_constraints(self, value):
        """
        Set the feature constraints of the experiment with validation.
        """
        if isinstance(value, dict):
            self._feature_constraints = [value]
        elif isinstance(value, list):
            self._feature_constraints = value
        elif isinstance(value, str):
            self._feature_constraints = [value]
        else:
            self._feature_constraints = None
        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def optim(self):
        """
        The optimization method to use, either `'bo'` for Bayesian Optimization or `'sobol'` for Sobol sequence. Default is `'bo'`.
        """
        return self._optim

    @optim.setter
    def optim(self, value):
        """
        Set the optimization method with validation.
        """
        value = value.lower()
        if value not in ['bo', 'sobol']:
            raise ValueError(
                "Optimization method must be either 'bo' or 'sobol'")
        self._optim = value
        if self._first_initialization_done:
            self.set_gs()

    @property
    def data(self) -> pd.DataFrame:
        """
        Returns a DataFrame of the current data in the experiment, including features and outcomes.
        """
        feature_data = {name: info['data']
                        for name, info in self._features.items()}
        outcome_data = {name: info['data']
                        for name, info in self._outcomes.items()}
        data_dict = {**feature_data, **outcome_data}
        return pd.DataFrame(data_dict)

    @data.setter
    def data(self, value: pd.DataFrame):
        """
        Sets the features and outcomes data from a given DataFrame.
        """
        if not isinstance(value, pd.DataFrame):
            raise ValueError("Data must be a pandas DataFrame")

        feature_columns = [
            col for col in value.columns if col in self._features]
        outcome_columns = [
            col for col in value.columns if col in self._outcomes]

        for col in feature_columns:
            self._features[col]['data'] = value[col].tolist()

        for col in outcome_columns:
            self._outcomes[col]['data'] = value[col].tolist()

        if self._first_initialization_done:
            self.initialize_ax_client()

    @property
    def pareto_frontier(self):
        """
        The Pareto frontier for multi-objective optimization experiments.
        """
        return self._pareto_frontier

    @pareto_frontier.setter
    def pareto_frontier(self, value):
        """
        Set the Pareto frontier of the experiment.
        """
        self._pareto_frontier = value

    @property
    def acq_func(self):
        """
        The acquisition function to use for the optimization process. It must be a dict with 2 keys:
        - `acqf`: the acquisition function class to use (e.g., `UpperConfidenceBound`),
        - `acqf_kwargs`: a dict of the kwargs to pass to the acquisition function class. (e.g. `{'beta': 0.1}`).

        If not provided, the default acquisition function is used (`LogExpectedImprovement` or `qLogExpectedImprovement` if N>1).

        Example
        -------
        .. code-block:: python

            acq_func = {
                'acqf': UpperConfidenceBound,
                'acqf_kwargs': {'beta': 0.1},  # lower = exploitation, higher = exploration
            }
        """
        return self._acq_func

    @acq_func.setter
    def acq_func(self, value):
        """
        Set the acquisition function with validation.
        """
        self._acq_func = value
        if self._first_initialization_done:
            self.set_gs()

    def __repr__(self):
        return self.__str__()

    def __str__(self):
        """
        Return a string representation of the BOExperiment instance.
        """
        return f"""
BOExperiment(
    N={self.N},
    maximize={self.maximize},
    outcome_constraints={self.outcome_constraints},
    feature_constraints={self.feature_constraints},
    optim={self.optim}
)

Input data:

{self.data}
        """


[docs]
    def initialize_ax_client(self):
        """
        Initialize the AxClient with the experiment's parameters, objectives, and constraints.
        """
        print('\n========   INITIALIZING MODEL   ========\n')
        self.ax_client = AxClient(verbose_logging=False,
                                  suppress_storage_errors=True)
        self.parameters = []
        for name, info in self._features.items():
            if info['type'] == 'text':
                values = [str(val) for val in info['range']]
                self.parameters.append({
                    "name": name,
                    "type": "choice",
                    "values": values,
                    "value_type": "str",
                    "is_ordered": len(values) == 2,
                    "sort_values": False})
            elif info['type'] == 'int':
                self.parameters.append({
                    "name": name,
                    "type": "range",
                    "bounds": [int(np.min(info['range'])),
                               int(np.max(info['range']))],
                    "value_type": "int"})
            elif info['type'] == 'float':
                self.parameters.append({
                    "name": name,
                    "type": "range",
                    "bounds": [float(np.min(info['range'])),
                               float(np.max(info['range']))],
                    "value_type": "float"})

        objectives = {}
        for k, v in self._maximize.items():
            if isinstance(v, bool) and k in self._outcomes.keys():
                threshold = None
                if self._objective_thresholds is not None:
                    threshold = self._objective_thresholds.get(k)
                objectives[k] = ObjectiveProperties(
                    minimize=not v, threshold=threshold)

        self.ax_client.create_experiment(
            name="bayesian_optimization",
            parameters=self.parameters,
            objectives=objectives,
            parameter_constraints=self._feature_constraints,
            outcome_constraints=self._outcome_constraints,
            overwrite_existing_experiment=True
        )

        if len(next(iter(self._outcomes.values()))['data']) > 0:
            for i in range(len(next(iter(self._outcomes.values()))['data'])):
                params = {name: info['data'][i]
                          for name, info in self._features.items()}
                outcomes = {name: info['data'][i]
                            for name, info in self._outcomes.items()}
                self.ax_client.attach_trial(params)
                self.ax_client.complete_trial(trial_index=i, raw_data=outcomes)

        self.set_model()
        self.set_gs()


    def _has_completed_data(self) -> bool:
        """Return whether the current Ax experiment has usable completed data."""
        if self.ax_client is None or self.ax_client.experiment is None:
            return False
        data = self.ax_client.experiment.fetch_data()
        if data is None:
            return False
        df = getattr(data, "df", None)
        return df is not None and not df.empty


[docs]
    def set_model(self):
        """
        Set the model to be used for predictions.
        This method is called after initializing the AxClient.
        """
        if not self._has_completed_data():
            self.model = None
            return

        try:
            self.model = Generators.BOTORCH_MODULAR(
                experiment=self.ax_client.experiment,
                data=self.ax_client.experiment.fetch_data()
            )
        except DataRequiredError:
            self.model = None



[docs]
    def set_gs(self):
        """
        Set the generation strategy for the experiment.
        This method is called after initializing the AxClient.
        """
        self.clear_trials()
        if self._optim == 'bo':
            if not self.model:
                self.set_model()
            if self.model is None:
                self.gs = GenerationStrategy(
                    steps=[GenerationStep(
                        generator=Generators.SOBOL,
                        num_trials=-1,
                        should_deduplicate=True,
                        model_kwargs={"seed": self.seed},
                        model_gen_kwargs={},
                    )
                    ]
                )
            elif self.acq_func is None:
                self.gs = GenerationStrategy(
                    steps=[GenerationStep(
                        generator=Generators.BOTORCH_MODULAR,
                        num_trials=-1,  # No limitation on how many trials should be produced from this step
                        max_parallelism=3,  # Parallelism limit for this step, often lower than for Sobol
                    )
                    ]
                )
            else:
                self.gs = GenerationStrategy(
                    steps=[GenerationStep(
                        generator=Generators.BOTORCH_MODULAR,
                        num_trials=-1,  # No limitation on how many trials should be produced from this step
                        max_parallelism=3,  # Parallelism limit for this step, often lower than for Sobol
                        model_kwargs={
                            "seed": self.seed,
                            "botorch_acqf_class": self.acq_func['acqf'],
                            "botorch_acqf_options": self.acq_func['acqf_kwargs'],
                        },
                    )
                    ]
                )
        elif self._optim == 'sobol':
            self.gs = GenerationStrategy(
                steps=[GenerationStep(
                    generator=Generators.SOBOL,
                    num_trials=-1,  # How many trials should be produced from this generation step
                    should_deduplicate=True,  # Deduplicate the trials
                    # Any kwargs you want passed into the model
                    model_kwargs={"seed": self.seed},
                    model_gen_kwargs={},  # Any kwargs you want passed to `modelbridge.gen`
                )
                ]
            )
        generated_runs = self.gs.gen(
            # Ax `Experiment`, for which to generate new candidates
            experiment=self.ax_client.experiment,
            data=None,  # Ax `Data` to use for model training, optional.
            n=self._N,  # Number of candidate arms to produce
            fixed_features=self._fixed_features,
            pending_observations=None,
        )
        self.generator_run = generated_runs[0][0]



[docs]
    def clear_trials(self):
        """
        Clear all trials in the experiment.
        """
        # Get all pending trial indices
        pending_trials = [k for k, i in self.ax_client.experiment.trials.items()
                          if i.status == TrialStatus.CANDIDATE]
        for i in pending_trials:
            self.ax_client.experiment.trials[i].mark_abandoned()



[docs]
    def suggest_next_trials(self, with_predicted=True):
        """
        Suggest the next set of trials based on the current model and optimization strategy.

        Returns
        -------

        pd.DataFrame: 
            DataFrame containing the suggested trials and their predicted outcomes.
        """
        self.clear_trials()
        if self.ax_client is None:
            self.initialize_ax_client()
        if self._N == 1:
            self.candidate = self.ax_client.experiment.new_trial(
                self.generator_run)
        else:
            self.candidate = self.ax_client.experiment.new_batch_trial(
                self.generator_run)
        if hasattr(self.candidate, "arms"):
            arm_parameters = [arm.parameters for arm in self.candidate.arms]
        else:
            arm_parameters = [self.candidate.arm.parameters]
        trials = pd.DataFrame(arm_parameters)
        trials = trials[[name for name in self.names]]
        if with_predicted and self.model is None:
            return trials.reset_index(drop=True)
        if with_predicted:
            topred = [trials.iloc[i].to_dict() for i in range(len(trials))]
            preds = self.predict(topred)[0]
            preds = pd.DataFrame(preds)
            # add 'predicted_' to the names of the pred dataframe
            preds.columns = [f'Predicted_{col}' for col in preds.columns]
            preds = preds.reset_index(drop=True)
            trials = trials.reset_index(drop=True)
            return pd.concat([trials, preds], axis=1)
        else:
            return trials


    def _get_observed_best_parameters(self):
        """Return best observed rows when no fitted Ax model is available yet."""
        data = self.data.copy()
        objective_names = [
            name for name, maximize in self._maximize.items() if isinstance(maximize, bool)]
        if len(objective_names) == 0:
            return pd.DataFrame()

        data = data.dropna(subset=objective_names)
        if data.empty:
            return pd.DataFrame(columns=self.data.columns)

        if len(objective_names) == 1:
            objective_name = objective_names[0]
            if self._maximize[objective_name]:
                best_index = data[objective_name].idxmax()
            else:
                best_index = data[objective_name].idxmin()
            return data.loc[[best_index]].reset_index(drop=True)

        return data.reset_index(drop=True)


[docs]
    def predict(self, params):
        """
        Predict the outcomes for a given set of parameters using the current model.

        Parameters
        ----------

        params : List[Dict[str, Any]]
            List of parameter dictionaries for which to predict outcomes.

        Returns
        -------

        List[Dict[str, float]]: 
            List of predicted outcomes for the given parameters.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
        if self.model is None:
            raise ValueError(
                "Predictions require at least one completed experiment with numeric outcome data.")
        obs_feats = [ObservationFeatures(parameters=p) for p in params]
        f, cm = self.model.predict(obs_feats)
        # return prediction and std errors as a list of dictionaries
        # Convert to list of dictionaries
        predictions = []
        for i in range(len(obs_feats)):
            pred_dict = {}
            for metric_name in f.keys():
                pred_dict[metric_name] = {
                    'mean': f[metric_name][i],
                    'std': np.sqrt(cm[metric_name][metric_name][i])
                }
            predictions.append(pred_dict)
        preds = [{k: v['mean'] for k, v in pred.items()}
                 for pred in predictions]
        stderrs = [{k: v['std'] for k, v in pred.items()}
                   for pred in predictions]
        return preds, stderrs



[docs]
    def update_experiment(self, params, outcomes):
        """
        Update the experiment with new parameters and outcomes, and reinitialize the AxClient.

        Parameters
        ----------

        params : Dict[str, Any]
            Dictionary of new parameters to update the experiment with.

        outcomes : Dict[str, Any]
            Dictionary of new outcomes to update the experiment with.
        """
        # append new data to the features and outcomes dictionaries
        for k, v in zip(params.keys(), params.values()):
            if k not in self._features:
                raise ValueError(f"Parameter '{k}' not found in features")
            if isinstance(v, np.ndarray):
                v = v.tolist()
            if not isinstance(v, list):
                v = [v]
            self._features[k]['data'] += v
        for k, v in zip(outcomes.keys(), outcomes.values()):
            if k not in self._outcomes:
                raise ValueError(f"Outcome '{k}' not found in outcomes")
            if isinstance(v, np.ndarray):
                v = v.tolist()
            if not isinstance(v, list):
                v = [v]
            self._outcomes[k]['data'] += v
        self.initialize_ax_client()



[docs]
    def plot_model(self, metricname=None, slice_values={}, linear=False):
        """
        Plot the model's predictions for the experiment's parameters and outcomes.
        Parameters
        ----------
        metricname : Optional[str]
            The name of the metric to plot. If None, the first outcome metric is used.
        slice_values : Optional[Dict[str, Any]]
            Dictionary of slice values for plotting.
        linear : bool
            Whether to plot a linear slice plot. Default is False.
        Returns
        -------
        plotly.graph_objects.Figure: 
            Plotly figure of the model's predictions.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
            self.suggest_next_trials()
        cand_name = 'Candidate' if self._N == 1 else 'Candidates'
        mname = self.ax_client.objective_names[0] if metricname is None else metricname
        param_name = [
            name for name in self.names if name not in slice_values.keys()]
        par_numeric = [
            name for name in param_name if self._features[name]['type'] in ['int', 'float']]

        if self.model is None:
            completed_trials = self.ax_client.get_trials_data_frame()
            completed_trials = completed_trials[completed_trials['trial_status'] != 'CANDIDATE'].copy(
            )
            if mname not in completed_trials.columns:
                return go.Figure()

            if len(par_numeric) == 0:
                return go.Figure()

            if len(par_numeric) == 1:
                fig = px.scatter(
                    completed_trials,
                    x=par_numeric[0],
                    y=mname,
                    title=f"Observed {mname} vs {par_numeric[0]}",
                )
            elif len(par_numeric) == 2:
                fig = px.scatter(
                    completed_trials,
                    x=par_numeric[0],
                    y=par_numeric[1],
                    color=mname,
                    color_continuous_scale="Viridis",
                    title=f"Observed {par_numeric[1]} vs {par_numeric[0]}",
                )
            else:
                fig = px.scatter_matrix(
                    completed_trials,
                    dimensions=par_numeric + [mname],
                    title=f"Observed relationships for {mname}",
                )

            return fig

        if len(par_numeric) == 1:
            fig = plot_slice(
                model=self.model,
                metric_name=mname,
                density=100,
                param_name=par_numeric[0],
                generator_runs_dict={cand_name: self.generator_run},
                slice_values=slice_values
            )
        elif len(par_numeric) == 2:
            fig = plot_contour(
                model=self.model,
                metric_name=mname,
                param_x=par_numeric[0],
                param_y=par_numeric[1],
                generator_runs_dict={cand_name: self.generator_run},
                slice_values=slice_values
            )
        else:
            # remove sliced parameters from par_numeric
            pars = [p for p in par_numeric if p not in slice_values.keys()]
            fig = interact_contour(
                model=self.model,
                generator_runs_dict={cand_name: self.generator_run},
                metric_name=mname,
                slice_values=slice_values,
                parameters_to_use=pars
            )

        plotly_fig = go.Figure(fig.data)
        all_trials = self.ax_client.get_trials_data_frame()
        completed_trials = all_trials[all_trials['trial_status'] != 'CANDIDATE'].copy(
        )
        # compute distance to slice
        col_to_consider = completed_trials[[k for k in slice_values.keys()]]
        completed_trials.loc[:, 'signed_dist_to_slice'] = (
            # Sum of signed differences
            (col_to_consider - slice_values).sum(axis=1)
        )
        signed_dists = completed_trials['signed_dist_to_slice'].values
        positive_dists = signed_dists[signed_dists >= 0]
        negative_dists = signed_dists[signed_dists < 0]

        # Normalize positive distances to [0, 1]
        if len(positive_dists) > 0 and np.max(positive_dists) > 0:
            normalized_positive = positive_dists / np.max(positive_dists)
        else:
            normalized_positive = np.zeros_like(positive_dists)

        # Normalize negative distances to [-1, 0]
        if len(negative_dists) > 0 and np.min(negative_dists) < 0:
            normalized_negative = negative_dists / \
                np.abs(np.min(negative_dists))
        else:
            normalized_negative = np.zeros_like(negative_dists)

        # Combine the normalized distances
        normalized_signed_dists = np.zeros_like(signed_dists)
        normalized_signed_dists[signed_dists >= 0] = normalized_positive
        normalized_signed_dists[signed_dists < 0] = normalized_negative

        completed_trials.loc[:,
                             'normalized_signed_dist'] = normalized_signed_dists
        coolwarm = mpl.colormaps['bwr']
        # Map from [-1,1] to [0,1]
        normalized_values = (
            completed_trials['normalized_signed_dist'] + 1) / 2
        colors = [
            f"rgb({int(r*255)}, {int(g*255)}, {int(b*255)})"
            for r, g, b, _ in coolwarm(normalized_values)
        ]
        completed_trials.loc[:, 'colors'] = colors
        trials = self.ax_client.get_trials_data_frame()
        trials = trials[trials['trial_status'] == 'CANDIDATE']
        trials = trials[[name for name in self.names]]

        in_sample_trace_idx = 0
        for trace in plotly_fig.data:
            if trace.type == "contour":
                trace.colorscale = "viridis"
            if 'marker' in trace and trace.legendgroup != cand_name:
                arm_names = []
                if trace['text']:
                    for text in trace['text']:
                        print(text)
                        match = re.search(r'Arm (\d+_\d+)', text)
                        if match:
                            arm_names.append(match.group(1))
                    arm_to_color = dict(
                        zip(completed_trials['arm_name'], completed_trials['colors']))
                    trace.marker.color = [arm_to_color[arm]
                                          for arm in arm_names]
                trace.marker.symbol = "circle"
                trace.marker.size = 10
                trace.marker.line.width = 2
                trace.marker.line.color = 'black'
                # if len(opacities) > 0:
                # trace.marker.opacity = opacities
                if trace.text is not None:
                    trace.text = [t.replace('Arm', '<b>Sample').replace(
                        "_0", "</b>") for t in trace.text]
            if trace.legendgroup == cand_name:
                trace.marker.line.color = 'red'
                trace.marker.color = "orange"
                trace.name = cand_name
                trace.marker.symbol = "x"
                trace.marker.size = 12
                trace.marker.opacity = 1
                trace.hoverinfo = "text"
                trace.hoverlabel = dict(bgcolor="#f8e3cd", font_color='black')
                if trace.text is not None:
                    trace.text = [t.replace("<i>", "").replace(
                        "</i>", "") for t in trace.text]
                trace.text = [
                    f"<b>Candidate {i+1}</b><br>{'<br>'.join([f'{col}: {val}' for col, val in trials.iloc[i].items()])}"
                    for t in trace.text
                    for i in range(len(trials))
                ]

        plotly_fig.update_layout(
            plot_bgcolor="white",
            legend=dict(bgcolor='rgba(0,0,0,0)'),
            margin=dict(l=10, r=10, t=50, b=50),
            xaxis=dict(
                showgrid=True,
                gridcolor="lightgray",
                zeroline=False,
                zerolinecolor="black",
                showline=True,
                linewidth=1,
                linecolor="black",
                mirror=True
            ),
            yaxis=dict(
                showgrid=True,
                gridcolor="lightgray",
                zeroline=False,
                zerolinecolor="black",
                showline=True,
                linewidth=1,
                linecolor="black",
                mirror=True
            ),
            xaxis2=dict(
                showgrid=True,
                gridcolor="lightgray",
                zeroline=False,
                zerolinecolor="black",
                showline=True,
                linewidth=1,
                linecolor="black",
                mirror=True
            ),
            yaxis2=dict(
                showgrid=True,
                gridcolor="lightgray",
                zeroline=False,
                zerolinecolor="black",
                showline=True,
                linewidth=1,
                linecolor="black",
                mirror=True
            ),
        )
        return plotly_fig



[docs]
    def plot_optimization_trace(self, optimum=None):
        """
        Plot the optimization trace, showing the progress of the optimization over trials.

        Parameters
        ----------

        optimum : Optional[float]
            The optimal value to plot on the optimization trace.

        Returns
        -------

        plotly.graph_objects.Figure: 
            Plotly figure of the optimization trace.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
        if len(self._outcomes) > 1:
            print("Optimization trace is not available for multi-objective optimization.")
            return None
        fig = self.ax_client.get_optimization_trace(objective_optimum=optimum)
        fig = go.Figure(fig.data)
        for trace in fig.data:
            # add hover info
            trace.hoverinfo = "x+y"
        fig.update_layout(
            plot_bgcolor="white",  # White background
            legend=dict(bgcolor='rgba(0,0,0,0)'),
            margin=dict(l=50, r=10, t=50, b=50),
            xaxis=dict(
                showgrid=True,  # Enable grid
                gridcolor="lightgray",  # Light gray grid lines
                zeroline=False,
                zerolinecolor="black",  # Black zero line
                showline=True,
                linewidth=1,
                linecolor="black",  # Black border
                mirror=True
            ),
            yaxis=dict(
                showgrid=True,  # Enable grid
                gridcolor="lightgray",  # Light gray grid lines
                zeroline=False,
                zerolinecolor="black",  # Black zero line
                showline=True,
                linewidth=1,
                linecolor="black",  # Black border
                mirror=True
            ),
        )
        return fig



[docs]
    def plot_feature_importances(self, relative=False):
        """
        Plot feature importances using Ax default Sensitivity Analysis cards
        (same analysis family as in Ax tutorials).

        Parameters
        ----------
        relative : bool, optional
            Used only by the fallback Ax helper plot if analysis cards are
            unavailable. Default is False.

        Returns
        -------
        plotly.graph_objects.Figure:
            Plotly figure of feature importances.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
        if self.model is None:
            self.set_model()

        def _style_sensitivity_figure(fig):
            import re as _re

            # Build lookup: full _OH_PARAM_N key → human-readable label
            oh_label_map = {}
            for name, info in self._features.items():
                if info['type'] == 'text':
                    for i, val in enumerate(info.get('range', [])):
                        oh_label_map[f"{name}_OH_PARAM_{i}"] = f"{name}: {val}"

            def resolve_label(raw):
                if not isinstance(raw, str):
                    return raw
                return oh_label_map.get(raw, raw)

            for trace in fig.data:
                trace_name = str(getattr(trace, "name", "") or "")
                if "Increases" in trace_name:
                    trace.marker.color = "#2ca02c"
                elif "Decreases" in trace_name:
                    trace.marker.color = "#d62728"

                if getattr(trace, "type", None) == "bar":
                    trace.width = 0.85
                    trace.offsetgroup = None
                    trace.alignmentgroup = None

                    # customdata[0] holds the FULL _OH_PARAM_N key (first pass)
                    # or already-resolved label (second pass) — resolve either way
                    cd = getattr(trace, "customdata", None)
                    if cd is not None:
                        cd_arr = np.array(cd, dtype=object)
                        resolved_labels = []
                        for ri in range(cd_arr.shape[0]):
                            resolved = resolve_label(str(cd_arr[ri, 0]))
                            cd_arr[ri, 0] = resolved
                            resolved_labels.append(resolved)
                        trace.customdata = cd_arr
                        # Override trace.y with the resolved labels from customdata
                        # (trace.y contains Ax's pre-truncated display strings — ignore them)
                        trace.y = tuple(resolved_labels)

                    # Fix hovertemplate:
                    # - remove the truncated_parameter_name=%{y} line (y is now resolved anyway)
                    # - keep parameter_name pointing at customdata[0] which is now resolved
                    ht = getattr(trace, "hovertemplate", None)
                    if ht is not None:
                        ht = ht.replace(
                            "truncated_parameter_name=%{y}<br>", "")
                        ht = ht.replace(
                            "truncated_parameter_name=%{y}<br />", "")
                        # if hovertemplate still uses %{y} for parameter_name, redirect to customdata
                        ht = ht.replace(
                            "parameter_name=%{y}", "parameter_name=%{customdata[0]}")
                        trace.hovertemplate = ht

            fig.update_layout(barmode="overlay")
            fig.update_yaxes(title_text="", automargin=True)
            fig.update_xaxes(title_text="Importance")
            return fig

        metric_names = list(self.ax_client.objective_names)
        if len(metric_names) == 0:
            return None

        figures = []
        labels = []
        for metric_name in metric_names:
            try:
                card = SensitivityAnalysisPlot(metric_name=metric_name).compute(
                    experiment=self.ax_client.experiment,
                    generation_strategy=self.gs,
                    adapter=self.model,
                )
                figures.append(_style_sensitivity_figure(card.get_figure()))
                labels.append(metric_name)
            except (AttributeError, RuntimeError, ValueError, TypeError, KeyError):
                continue

        if len(figures) == 0:
            # Fallback to legacy Ax plot helper if analysis cards are unavailable.
            try:
                fig = plot_feature_importance_by_feature_plotly(
                    model=self.model,
                    relative=relative,
                )
                return _style_sensitivity_figure(fig)
            except (AttributeError, RuntimeError, ValueError, TypeError, KeyError):
                return None

        if len(figures) == 1:
            return _style_sensitivity_figure(figures[0])

        merged = go.Figure()
        trace_blocks = []
        for fig in figures:
            start = len(merged.data)
            for tr in fig.data:
                merged.add_trace(tr)
            end = len(merged.data)
            trace_blocks.append((start, end))

        for i, (start, end) in enumerate(trace_blocks):
            for j, _ in enumerate(merged.data):
                merged.data[j].visible = (i == 0 and start <= j < end)

        buttons = []
        for i, metric_name in enumerate(labels):
            vis = [False] * len(merged.data)
            start, end = trace_blocks[i]
            for j in range(start, end):
                vis[j] = True
            button = {
                "label": metric_name,
                "method": "update",
                "args": [
                    {"visible": vis},
                    {"title": f"Sensitivity Analysis for {metric_name}"},
                ],
            }
            buttons.append(button)

        merged.update_layout(figures[0].layout)
        merged.update_layout(
            updatemenus=[
                {
                    "x": 1.0,
                    "xanchor": "right",
                    "y": 1.15,
                    "yanchor": "top",
                    "buttons": buttons,
                }
            ]
        )
        return _style_sensitivity_figure(merged)



[docs]
    def compute_pareto_frontier(self):
        """
        Compute the Pareto frontier for multi-objective optimization experiments.

        Returns
        -------
        The Pareto frontier.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
        if len(self._outcomes) < 2:
            print("Pareto frontier is not available for single-objective optimization.")
            return None

        if self.Nmetrics == 2:
            objectives = self.ax_client.experiment.optimization_config.objective.objectives
            self.pareto_frontier = compute_posterior_pareto_frontier(
                experiment=self.ax_client.experiment,
                data=self.ax_client.experiment.fetch_data(),
                primary_objective=objectives[1].metric,
                secondary_objective=objectives[0].metric,
                absolute_metrics=[o.metric.name for o in objectives],
                num_points=20,
            )
        else:
            # For 3+ objectives, keep Pareto-optimal points and visualize in plot_pareto_frontier.
            self.pareto_frontier = self.ax_client.get_pareto_optimal_parameters()
        return self.pareto_frontier



[docs]
    def plot_pareto_frontier(self, show_error_bars=True):
        """
        Plot the Pareto frontier for multi-objective optimization experiments.

        Parameters
        ----------
        show_error_bars : bool, optional
            Whether to show error bars on the plot. Default is True.

        Returns
        -------
        plotly.graph_objects.Figure: 
            Plotly figure of the Pareto frontier.
        """
        if self.pareto_frontier is None:
            return None

        if self.Nmetrics > 2:
            df = ordered_dict_to_dataframe(self.pareto_frontier)
            objective_names = [
                name for name in self.ax_client.objective_names if name in df.columns]
            if len(objective_names) < 2:
                return None
            fig = px.scatter_matrix(
                df,
                dimensions=objective_names,
                hover_data=[name for name in self.names if name in df.columns],
                title="Pareto-optimal objective trade-offs",
            )
            fig.update_traces(diagonal_visible=False)
        else:
            fig = plot_pareto_frontier(self.pareto_frontier)
            fig = go.Figure(fig.data)

            # Modify traces to show/hide error bars
            if not show_error_bars:
                for trace in fig.data:
                    # Remove error bars by setting them to None
                    if hasattr(trace, 'error_x') and trace.error_x is not None:
                        trace.error_x = None
                    if hasattr(trace, 'error_y') and trace.error_y is not None:
                        trace.error_y = None

        fig.update_layout(
            plot_bgcolor="white",  # White background
            legend=dict(bgcolor='rgba(0,0,0,0)'),
            margin=dict(l=50, r=10, t=50, b=50),
            xaxis=dict(
                showgrid=True,  # Enable grid
                gridcolor="lightgray",  # Light gray grid lines
                zeroline=False,
                zerolinecolor="black",  # Black zero line
                showline=True,
                linewidth=1,
                linecolor="black",  # Black border
                mirror=True
            ),
            yaxis=dict(
                showgrid=True,  # Enable grid
                gridcolor="lightgray",  # Light gray grid lines
                zeroline=False,
                zerolinecolor="black",  # Black zero line
                showline=True,
                linewidth=1,
                linecolor="black",  # Black border
                mirror=True
            ),
        )
        return fig



[docs]
    def get_best_parameters(self):
        """
        Return the best parameters found by the optimization process.

        Returns
        -------

        pd.DataFrame: 
            DataFrame containing the best parameters and their outcomes.
        """
        if self.ax_client is None:
            self.initialize_ax_client()
        if self.model is None:
            return self._get_observed_best_parameters()
        if self.Nmetrics == 1:
            best_result = self.ax_client.get_best_parameters()
            if best_result is None or best_result[0] is None or best_result[1] is None:
                return self._get_observed_best_parameters()

            best_parameters = best_result[0]
            best_outcomes = best_result[1]
            best_parameters.update(best_outcomes[0])
            best = pd.DataFrame(best_parameters, index=[0])
        else:
            best_parameters = self.ax_client.get_pareto_optimal_parameters()
            if best_parameters is None:
                return self._get_observed_best_parameters()
            best = ordered_dict_to_dataframe(best_parameters)
        return best



# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



[docs]
def flatten_dict(d, parent_key="", sep="_"):
    """
    Flatten a nested dictionary.
    """
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



[docs]
def ordered_dict_to_dataframe(data):
    """
    Convert an OrderedDict with arbitrary nesting to a DataFrame.
    """
    dflat = flatten_dict(data)
    out = []

    for key, value in dflat.items():
        main_dict = value[0]
        sub_dict = value[1][0]
        out.append([value for value in main_dict.values()] +
                   [value for value in sub_dict.values()])

    df = pd.DataFrame(out, columns=[key for key in main_dict.keys()] +
                                   [key for key in sub_dict.keys()])
    return df


# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #



[docs]
def read_experimental_data(file_path: str, out_pos=[-1]) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]:
    """
    Read experimental data from a CSV file and format it into features and outcomes dictionaries.

    Parameters
    ----------
    file_path (str) 
        Path to the CSV file containing experimental data.
    out_pos (list of int)
        Column indices of the outcome variables. Default is the last column.

    Returns
    -------
    Tuple[Dict[str, Dict[str, Any]], Dict[str, Dict[str, Any]]]
        Formatted features and outcomes dictionaries.
    """
    data = pd.read_csv(file_path)
    data = clean_names(data, remove_special=True, case_type='preserve')
    outcome_column_name = data.columns[out_pos]
    features = data.loc[:, ~data.columns.isin(outcome_column_name)].copy()
    outcomes = data[outcome_column_name].copy()

    feature_definitions = {}
    for column in features.columns:
        if features[column].dtype == 'object':
            unique_values = features[column].unique()
            feature_definitions[column] = {'type': 'text',
                                           'range': unique_values.tolist()}
        elif features[column].dtype in ['int64', 'float64']:
            min_val = features[column].min()
            max_val = features[column].max()
            feature_type = 'int' if features[column].dtype == 'int64' else 'float'
            feature_definitions[column] = {'type': feature_type,
                                           'range': [min_val, max_val]}

    formatted_features = {name: {'type': info['type'],
                                 'data': features[name].tolist(),
                                 'range': info['range']}
                          for name, info in feature_definitions.items()}
    # same for outcomes with just type and data
    outcome_definitions = {}
    for column in outcomes.columns:
        if outcomes[column].dtype == 'object':
            unique_values = outcomes[column].unique()
            outcome_definitions[column] = {'type': 'text',
                                           'data': unique_values.tolist()}
        elif outcomes[column].dtype in ['int64', 'float64']:
            min_val = outcomes[column].min()
            max_val = outcomes[column].max()
            outcome_type = 'int' if outcomes[column].dtype == 'int64' else 'float'
            outcome_definitions[column] = {'type': outcome_type,
                                           'data': outcomes[column].tolist()}
    formatted_outcomes = {name: {'type': info['type'],
                                 'data': outcomes[name].tolist()}
                          for name, info in outcome_definitions.items()}
    return formatted_features, formatted_outcomes