Add zooming quantitative bandit model

### Changes: * Added quantitative model support for continuous action spaces using zooming algorithm. * Added base model classes to separate single/multi-objective and cost control models. * Refactored MAB classes to support both discrete and continuous action spaces. * Updated test suite with new test cases for quantitative models and refactored test suite for robustness. * Added serialization support for quantitative models. * Removed redundant predict_actions_randomly from cMAB.
PlaytikaOSS · Feb 11, 2025 · 4401527 · 4401527
1 parent 64913ef
commit 4401527
Show file tree

Hide file tree

Showing 26 changed files with 3,534 additions and 2,351 deletions.
diff --git a/.github/workflows/continuous_delivery.yml b/.github/workflows/continuous_delivery.yml
@@ -25,8 +25,13 @@ jobs:
 
       - name: Install Poetry
         run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-          export PATH="$HOME/.poetry/bin:$PATH"
+          if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
+            curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
+            export PATH="$HOME/.poetry/bin:$PATH"
+          else
+            curl -sSL https://install.python-poetry.org | python3 -
+            export PATH="$HOME/.poetry/bin:$PATH"
+          fi
       - name: Backup pyproject.toml
         run: cp pyproject.toml pyproject.toml.bak
       - name: Install project dependencies with Poetry

diff --git a/.github/workflows/continuous_integration.yml b/.github/workflows/continuous_integration.yml
@@ -33,8 +33,13 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install Poetry
         run: |
-          curl -sSL https://install.python-poetry.org | python3 -
-          export PATH="$HOME/.poetry/bin:$PATH"
+          if [[ "${{ matrix.python-version }}" == "3.8" ]]; then
+            curl -sSL https://install.python-poetry.org | python3 - --version 1.8.0
+            export PATH="$HOME/.poetry/bin:$PATH"
+          else
+            curl -sSL https://install.python-poetry.org | python3 -
+            export PATH="$HOME/.poetry/bin:$PATH"
+          fi
       - name: Install project dependencies with Poetry
         run: |
           poetry add pydantic@${{ matrix.pydantic-version }}

diff --git a/.gitignore b/.gitignore
@@ -65,3 +65,6 @@ MANIFEST
 
 # poetry
 poetry.lock
+
+# qodo gen
+.qodo
diff --git a/pybandits/base.py b/pybandits/base.py
@@ -21,7 +21,9 @@
 # SOFTWARE.
 
 
-from typing import Any, Dict, List, NewType, Tuple, Union
+from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union
+
+from typing_extensions import Self
 
 from pybandits.pydantic_version_compatibility import (
     PYDANTIC_VERSION_1,
@@ -34,24 +36,52 @@
 )
 
 ActionId = NewType("ActionId", constr(min_length=1))
+QuantitativeActionId = Tuple[ActionId, Tuple[float, ...]]
+UnifiedActionId = Union[ActionId, QuantitativeActionId]
 Float01 = NewType("Float_0_1", confloat(ge=0, le=1))
 Probability = NewType("Probability", Float01)
+ProbabilityWeight = Tuple[Probability, float]
+MOProbability = List[Probability]
+MOProbabilityWeight = List[ProbabilityWeight]
+# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
+QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...]
+QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...]
+QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...]
+QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...]
+UnifiedProbability = Union[Probability, QuantitativeProbability]
+UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
+UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
+UnifiedMOProbabilityWeight = Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]
 # SmabPredictions is a tuple of two lists: the first list contains the selected action ids,
 # and the second list contains their associated probabilities
-SmabPredictions = NewType("SmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]]])
+SmabPredictions = NewType(
+    "SmabPredictions",
+    Tuple[
+        List[UnifiedActionId],
+        Union[List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, MOProbability]]],
+    ],
+)
 # CmabPredictions is a tuple of three lists: the first list contains the selected action ids,
 # the second list contains their associated probabilities,
 # and the third list contains their associated weighted sums
 CmabPredictions = NewType(
-    "CmabPredictions", Tuple[List[ActionId], List[Dict[ActionId, Probability]], List[Dict[ActionId, float]]]
+    "CmabPredictions",
+    Union[
+        Tuple[List[UnifiedActionId], List[Dict[UnifiedActionId, Probability]], List[Dict[UnifiedActionId, float]]],
+        Tuple[
+            List[UnifiedActionId], List[Dict[UnifiedActionId, MOProbability]], List[Dict[UnifiedActionId, List[float]]]
+        ],
+    ],
 )
 Predictions = NewType("Predictions", Union[SmabPredictions, CmabPredictions])
 BinaryReward = NewType("BinaryReward", conint(ge=0, le=1))
 ActionRewardLikelihood = NewType(
     "ActionRewardLikelihood",
-    Union[Dict[ActionId, float], Dict[ActionId, Probability], Dict[ActionId, List[Probability]]],
+    Union[Dict[UnifiedActionId, float], Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
 )
+Serializable = Union[str, int, float, bool, None, List["Serializable"], Dict[str, "Serializable"]]
 ACTION_IDS_PREFIX = "action_ids_"
+QUANTITATIVE_ACTION_IDS_PREFIX = f"quantitative_{ACTION_IDS_PREFIX}"
 
 
 class _classproperty(property):
@@ -74,6 +104,18 @@ def __init__(self, **data):
         def model_post_init(self, __context: Any) -> None:
             pass
 
+    def _validate_params_lengths(
+        self,
+        **kwargs,
+    ):
+        """
+        Verify that the given keyword arguments have the same length.
+        """
+        reference = len(next(iter(kwargs.values())))
+        for k, v in kwargs.items():
+            if v is not None and len(v) != reference:
+                raise AttributeError(f"Shape mismatch: {k} should have the same length as the other parameters.")
+
     def _apply_version_adjusted_method(self, v2_method_name: str, v1_method_name: str, **kwargs) -> Any:
         """
         Apply the method with the given name, adjusting for the pydantic version.
@@ -109,3 +151,46 @@ def model_fields(cls) -> Dict[str, Any]:
                 The model fields.
             """
             return cls.__fields__
+
+        def model_copy(self, *, update: Optional[Mapping[str, Any]] = None, deep: bool = False) -> Self:
+            """
+            Create a new instance of the model with the same quantities.
+
+            Parameters
+            ----------
+            update : Mapping[str, Any], optional
+                The quantities to update, by default None
+
+            deep : bool, optional
+                Whether to copy the quantities deeply, by default False
+
+            Returns
+            -------
+            Self
+                The new instance of the model.
+            """
+            return self.copy(update=update, deep=deep)
+
+        @classmethod
+        def model_validate(
+            cls,
+            obj: Any,
+        ) -> Self:
+            """
+            Validate a PyBandits BaseModel model instance.
+
+            Parameters
+            ----------
+            obj : Any
+                The object to validate. Use state dictionary to generate model from state.
+
+            Raises
+            ------
+                ValidationError: If the object could not be validated.
+
+            Returns
+            -------
+            Self
+                The validated model instance.
+            """
+            return cls.parse_obj(obj)
diff --git a/pybandits/base_model.py b/pybandits/base_model.py
@@ -0,0 +1,128 @@
+from abc import ABC, abstractmethod
+from typing import Callable, List, Union
+
+import numpy as np
+
+from pybandits.base import (
+    BinaryReward,
+    MOProbability,
+    Probability,
+    ProbabilityWeight,
+    PyBanditsBaseModel,
+    QuantitativeMOProbability,
+    QuantitativeProbability,
+    QuantitativeProbabilityWeight,
+)
+from pybandits.pydantic_version_compatibility import NonNegativeFloat
+
+
+class BaseModel(PyBanditsBaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions.
+    """
+
+    @abstractmethod
+    def sample_proba(
+        self, **kwargs
+    ) -> Union[
+        List[Probability],
+        List[MOProbability],
+        List[ProbabilityWeight],
+        List[QuantitativeProbability],
+        List[QuantitativeMOProbability],
+        List[QuantitativeProbabilityWeight],
+    ]:
+        """
+        Sample the probability of getting a positive reward.
+        """
+
+    @abstractmethod
+    def update(self, rewards: Union[List[BinaryReward], List[List[BinaryReward]]], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : Union[List[BinaryReward], List[List[BinaryReward]]],
+            if nested list, len() should follow shape of (n_samples, n_objectives)
+            The binary reward for each sample.
+                If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
+                    rewards = [1, 0, 1, 1, 1, ...]
+                If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
+                    rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
+        """
+
+
+class BaseModelSO(BaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions for single objective.
+    """
+
+    @abstractmethod
+    def sample_proba(
+        self, **kwargs
+    ) -> Union[
+        List[Probability], List[ProbabilityWeight], List[QuantitativeProbability], List[QuantitativeProbabilityWeight]
+    ]:
+        """
+        Sample the probability of getting a positive reward.
+        """
+
+    @abstractmethod
+    def update(self, rewards: List[BinaryReward], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : List[BinaryReward],
+            The binary reward for each sample.
+        """
+
+
+class BaseModelMO(BaseModel, ABC):
+    """
+    Class to model the prior distributions of standard actions and quantitative actions for multi-objective.
+
+    Parameters
+    ----------
+    models : List[BaseModelSO]
+        The list of models for each objective.
+    """
+
+    models: List[BaseModelSO]
+
+    @abstractmethod
+    def sample_proba(self, **kwargs) -> Union[List[MOProbability], List[QuantitativeMOProbability]]:
+        """
+        Sample the probability of getting a positive reward.
+        """
+
+    @abstractmethod
+    def update(self, rewards: List[List[BinaryReward]], **kwargs):
+        """
+        Update the model parameters.
+
+        Parameters
+        ----------
+        rewards : List[List[BinaryReward]],
+            if nested list, len() should follow shape of (n_samples, n_objectives)
+            The binary rewards for each sample.
+                If strategy is not MultiObjectiveBandit, rewards should be a list, e.g.
+                    rewards = [1, 0, 1, 1, 1, ...]
+                If strategy is MultiObjectiveBandit, rewards should be a list of list, e.g. (with n_objectives=2):
+                    rewards = [[1, 1], [1, 0], [1, 1], [1, 0], [1, 1], ...]
+        """
+
+
+class BaseModelCC(PyBanditsBaseModel, ABC):
+    """
+    Class to model action cost.
+
+    Parameters
+    ----------
+    cost: Union[NonNegativeFloat, Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]]
+        Cost associated to the Beta distribution.
+    """
+
+    cost: Union[NonNegativeFloat, Callable[[Union[float, np.ndarray]], NonNegativeFloat]]