Skip to content

Commit

Permalink
Add zooming quantitative bandit model
Browse files Browse the repository at this point in the history
 ### Changes:
 * Added quantitative model support for continuous action spaces using zooming algorithm.
 * Added base model classes to separate single/multi-objective and cost control models.
 * Refactored MAB classes to support both discrete and continuous action spaces.
 * Updated test suite with new test cases for quantitative models and refactored test suite for robustness.
 * Added serialization support for quantitative models.
 * Removed redundant predict_actions_randomly from cMAB.
  • Loading branch information
Shahar-Bar committed Feb 11, 2025
1 parent 64913ef commit 4401527
Show file tree
Hide file tree
Showing 26 changed files with 3,534 additions and 2,351 deletions.
9 changes: 7 additions & 2 deletions .github/workflows/continuous_delivery.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@ jobs:

- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
export PATH="$HOME/.poetry/bin:$PATH"
else
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
fi
- name: Backup pyproject.toml
run: cp pyproject.toml pyproject.toml.bak
- name: Install project dependencies with Poetry
Expand Down
9 changes: 7 additions & 2 deletions .github/workflows/continuous_integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,13 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install Poetry
run: |
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
export PATH="$HOME/.poetry/bin:$PATH"
else
curl -sSL https://install.python-poetry.org | python3 -
export PATH="$HOME/.poetry/bin:$PATH"
fi
- name: Install project dependencies with Poetry
run: |
poetry add pydantic@${{ matrix.pydantic-version }}
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,6 @@ MANIFEST

# poetry
poetry.lock

# qodo gen
.qodo
93 changes: 89 additions & 4 deletions pybandits/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
# SOFTWARE.


from typing import Any, Dict, List, NewType, Tuple, Union
from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union

from typing_extensions import Self

from pybandits.pydantic_version_compatibility import (
PYDANTIC_VERSION_1,
Expand All @@ -34,24 +36,52 @@
)

ActionId = NewType("ActionId", constr(min_length=1))
QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]]
UnifiedActionId = Union[ActionId, QuantitativeActionId]
Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
Probability = NewType("Probability", Float01)
ProbabilityWeight = Tuple[Probability, float]
MOProbability = List[Probability]
MOProbabilityWeight = List[ProbabilityWeight]
# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...]
QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...]
QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...]
QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...]
UnifiedProbability = Union[Probability, QuantitativeProbability]
UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]
# SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
# and the second list contains their associated probabilities
SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
SmabPredictions = NewType(
"SmabPredictions",
Tuple[
List[UnifiedActionId],
Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]],
],
)
# CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
# the second list contains their associated probabilities,
# and the third list contains their associated weighted sums
CmabPredictions = NewType(
"CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
"CmabPredictions",
Union[
Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]],
Tuple[
List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]]
],
],
)
Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions])
BinaryReward = NewType("BinaryReward", conint(ge=0, le=1))
ActionRewardLikelihood = NewType(
"ActionRewardLikelihood",
Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]],
Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
)
Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]]
ACTION_IDS_PREFIX = "action_ids_"
QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}"


class _classproperty(property):
Expand All @@ -74,6 +104,18 @@ def __init__(self, **data):
def model_post_init(self, __context: Any) -> None:
pass

def _validate_params_lengths(
self,
**kwargs,
):
"""
Verify that the given keyword arguments have the same length.
"""
reference = len(next(iter(kwargs.values())))
for k, v in kwargs.items():
if v is not None and len(v) != reference:
raise AttributeError(f"Shape mismatch: {k} should have the same length as the other parameters.")

def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any:
"""
Apply the method with the given name, adjusting for the pydantic version.
Expand Down Expand Up @@ -109,3 +151,46 @@ def model_fields(cls) -> Dict[str, Any]:
The model fields.
"""
return cls.__fields__

def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self:
"""
Create a new instance of the model with the same quantities.
Parameters
----------
update : Mapping[str, Any], optional
The quantities to update, by default None
deep : bool, optional
Whether to copy the quantities deeply, by default False
Returns
-------
Self
The new instance of the model.
"""
return self.copy(update=update, deep=deep)

@classmethod
def model_validate(
cls,
obj: Any,
) -> Self:
"""
Validate a PyBandits BaseModel model instance.
Parameters
----------
obj : Any
The object to validate. Use state dictionary to generate model from state.
Raises
------
ValidationError: If the object could not be validated.
Returns
-------
Self
The validated model instance.
"""
return cls.parse_obj(obj)
128 changes: 128 additions & 0 deletions pybandits/base_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from abc import ABC, abstractmethod
from typing import Callable, List, Union

import numpy as np

from pybandits.base import (
BinaryReward,
MOProbability,
Probability,
ProbabilityWeight,
PyBanditsBaseModel,
QuantitativeMOProbability,
QuantitativeProbability,
QuantitativeProbabilityWeight,
)
from pybandits.pydantic_version_compatibility import NonNegativeFloat


class BaseModel(PyBanditsBaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions.
"""

@abstractmethod
def sample_proba(
self, **kwargs
) -> Union[
List[Probability],
List[MOProbability],
List[ProbabilityWeight],
List[QuantitativeProbability],
List[QuantitativeMOProbability],
List[QuantitativeProbabilityWeight],
]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : Union[List[BinaryReward], List[List[BinaryReward]]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary reward for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelSO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for single objective.
"""

@abstractmethod
def sample_proba(
self, **kwargs
) -> Union[
List[Probability], List[ProbabilityWeight], List[QuantitativeProbability], List[QuantitativeProbabilityWeight]
]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: List[BinaryReward], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : List[BinaryReward],
The binary reward for each sample.
"""


class BaseModelMO(BaseModel, ABC):
"""
Class to model the prior distributions of standard actions and quantitative actions for multi-objective.
Parameters
----------
models : List[BaseModelSO]
The list of models for each objective.
"""

models: List[BaseModelSO]

@abstractmethod
def sample_proba(self, **kwargs) -> Union[List[MOProbability], List[QuantitativeMOProbability]]:
"""
Sample the probability of getting a positive reward.
"""

@abstractmethod
def update(self, rewards: List[List[BinaryReward]], **kwargs):
"""
Update the model parameters.
Parameters
----------
rewards : List[List[BinaryReward]],
if nested list, len() should follow shape of (n_samples, n_objectives)
The binary rewards for each sample.
If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
rewards = [1, 0, 1, 1, 1, ...]
If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
"""


class BaseModelCC(PyBanditsBaseModel, ABC):
"""
Class to model action cost.
Parameters
----------
cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]]
Cost associated to the Beta distribution.
"""

cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]]
Loading

0 comments on commit 4401527

Please sign in to comment.