objective

Objective module public API.

This module provides:

  • Base interfaces: Objective, Policy
  • Sampling: sample_states, default_rng
  • Concrete policies: ConstantPolicy, LinearPolicy, SoftmaxPolicy
  • Concrete objectives: FixedRegressionObjective, PlantedLogisticObjective, ModelBasedObjective
  • Utility: optimal_u
 1"""Objective module public API.
 2
 3This module provides:
 4- Base interfaces: Objective, Policy
 5- Sampling: sample_states, default_rng
 6- Concrete policies: ConstantPolicy, LinearPolicy, SoftmaxPolicy
 7- Concrete objectives: FixedRegressionObjective, PlantedLogisticObjective,
 8  ModelBasedObjective
 9- Utility: optimal_u
10"""
11
12from objective.base import (
13    Objective,
14    Policy,
15    default_rng,
16    sample_states,
17)
18from objective.objectives import (
19    FixedRegressionObjective,
20    ModelBasedObjective,
21    PlantedLogisticObjective,
22)
23from objective.policy import (
24    ConstantPolicy,
25    LinearPolicy,
26    SoftmaxPolicy,
27    policy_from_kind,
28)
29from objective.utils import optimal_u
30
31__all__ = [
32    # Base interfaces
33    "Objective",
34    "Policy",
35    "default_rng",
36    "sample_states",
37    # Concrete policies
38    "ConstantPolicy",
39    "LinearPolicy",
40    "SoftmaxPolicy",
41    "policy_from_kind",
42    # Concrete objectives
43    "FixedRegressionObjective",
44    "ModelBasedObjective",
45    "PlantedLogisticObjective",
46    # Utility
47    "optimal_u",
48]
class Objective:
35class Objective:
36    """Theta-space objective: $$J(\\theta) = \\mathbb{E}_x[f(\\pi_\\theta(x); x)]$$."""
37
38    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
39        """Return mean objective value for ``theta`` on ``x_batch``."""
40        raise NotImplementedError
41
42    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
43        """Return theta-gradient for ``theta`` on ``x_batch``."""
44        raise NotImplementedError

Theta-space objective: $$J(\theta) = \mathbb{E}_x[f(\pi_\theta(x); x)]$$.

def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> float:
38    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
39        """Return mean objective value for ``theta`` on ``x_batch``."""
40        raise NotImplementedError

Return mean objective value for theta on x_batch.

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
42    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
43        """Return theta-gradient for ``theta`` on ``x_batch``."""
44        raise NotImplementedError

Return theta-gradient for theta on x_batch.

class Policy:
23class Policy:
24    """Policy interface: $$u = \\pi_\\theta(x)$$ with gradient $$\\partial u / \\partial \\theta$$."""
25
26    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
27        """Return action values for batch, shape (n_samples,)."""
28        raise NotImplementedError
29
30    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
31        """Return policy gradients for batch, shape (n_samples, theta_dim)."""
32        raise NotImplementedError

Policy interface: $$u = \pi_\theta(x)$$ with gradient $$\partial u / \partial \theta$$.

def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
26    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
27        """Return action values for batch, shape (n_samples,)."""
28        raise NotImplementedError

Return action values for batch, shape (n_samples,).

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
30    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
31        """Return policy gradients for batch, shape (n_samples, theta_dim)."""
32        raise NotImplementedError

Return policy gradients for batch, shape (n_samples, theta_dim).

def default_rng(seed: int | None = None) -> numpy.random._generator.Generator:
11def default_rng(seed: Optional[int] = None) -> np.random.Generator:
12    """Return a NumPy random generator, optionally seeded."""
13    return np.random.default_rng(seed)

Return a NumPy random generator, optionally seeded.

def sample_states( rng: numpy.random._generator.Generator, n: int, dim: int) -> numpy.ndarray:
16def sample_states(rng: np.random.Generator, n: int, dim: int) -> np.ndarray:
17    """Sample n state vectors from $$\\mathcal{N}(0, I)$$, shape (n, dim)."""
18    if n <= 0 or dim <= 0:
19        raise ValueError("n and dim must be positive.")
20    return rng.normal(0.0, 1.0, size=(n, dim)).astype(float)

Sample n state vectors from $$\mathcal{N}(0, I)$$, shape (n, dim).

@dataclass(frozen=True)
class ConstantPolicy(objective.Policy):
29@dataclass(frozen=True)
30class ConstantPolicy(Policy):
31    """Constant policy: $$u = \\theta_0$$, ignores state $$x$$."""
32
33    kind: str = _POLICY_CONSTANT
34
35    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
36        """Return constant action for all samples, shape (n_samples,)."""
37        x_arr = np.asarray(x_batch, dtype=float)
38        if x_arr.ndim != 2:
39            raise ValueError("x_batch must be a 2D array.")
40        theta_arr = np.asarray(theta, dtype=float)
41        if theta_arr.size < 1:
42            raise ValueError("theta must have at least one element.")
43        return np.full(x_arr.shape[0], float(theta_arr[0]), dtype=float)
44
45    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
46        """Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim)."""
47        x_arr = np.asarray(x_batch, dtype=float)
48        if x_arr.ndim != 2:
49            raise ValueError("x_batch must be a 2D array.")
50        theta_arr = np.asarray(theta, dtype=float)
51        if theta_arr.size < 1:
52            raise ValueError("theta must have at least one element.")
53        n_samples = x_arr.shape[0]
54        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
55        grad[:, 0] = 1.0
56        return grad

Constant policy: $$u = \theta_0$$, ignores state $$x$$.

ConstantPolicy(kind: str = 'constant')
kind: str = 'constant'
def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
35    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
36        """Return constant action for all samples, shape (n_samples,)."""
37        x_arr = np.asarray(x_batch, dtype=float)
38        if x_arr.ndim != 2:
39            raise ValueError("x_batch must be a 2D array.")
40        theta_arr = np.asarray(theta, dtype=float)
41        if theta_arr.size < 1:
42            raise ValueError("theta must have at least one element.")
43        return np.full(x_arr.shape[0], float(theta_arr[0]), dtype=float)

Return constant action for all samples, shape (n_samples,).

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
45    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
46        """Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim)."""
47        x_arr = np.asarray(x_batch, dtype=float)
48        if x_arr.ndim != 2:
49            raise ValueError("x_batch must be a 2D array.")
50        theta_arr = np.asarray(theta, dtype=float)
51        if theta_arr.size < 1:
52            raise ValueError("theta must have at least one element.")
53        n_samples = x_arr.shape[0]
54        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
55        grad[:, 0] = 1.0
56        return grad

Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim).

@dataclass(frozen=True)
class LinearPolicy(objective.Policy):
59@dataclass(frozen=True)
60class LinearPolicy(Policy):
61    """Linear policy: $$u = \\theta^\\top \\phi(x)$$ where $$\\phi(x) = [1, x]$$."""
62
63    kind: str = _POLICY_LINEAR
64
65    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
66        """Return linear action values, shape (n_samples,)."""
67        features = _phi(x_batch)
68        theta_arr = np.asarray(theta, dtype=float)
69        if theta_arr.size < features.shape[1]:
70            raise ValueError("theta must have at least state_dim + 1 elements.")
71        return (features @ theta_arr[: features.shape[1]]).astype(float)
72
73    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
74        """Return gradient phi(x) for all samples, shape (n_samples, theta_dim)."""
75        features = _phi(x_batch)
76        theta_arr = np.asarray(theta, dtype=float)
77        if theta_arr.size < features.shape[1]:
78            raise ValueError("theta must have at least state_dim + 1 elements.")
79        n_samples = features.shape[0]
80        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
81        grad[:, : features.shape[1]] = features
82        return grad

Linear policy: $$u = \theta^\top \phi(x)$$ where $$\phi(x) = [1, x]$$.

LinearPolicy(kind: str = 'linear')
kind: str = 'linear'
def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
65    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
66        """Return linear action values, shape (n_samples,)."""
67        features = _phi(x_batch)
68        theta_arr = np.asarray(theta, dtype=float)
69        if theta_arr.size < features.shape[1]:
70            raise ValueError("theta must have at least state_dim + 1 elements.")
71        return (features @ theta_arr[: features.shape[1]]).astype(float)

Return linear action values, shape (n_samples,).

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
73    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
74        """Return gradient phi(x) for all samples, shape (n_samples, theta_dim)."""
75        features = _phi(x_batch)
76        theta_arr = np.asarray(theta, dtype=float)
77        if theta_arr.size < features.shape[1]:
78            raise ValueError("theta must have at least state_dim + 1 elements.")
79        n_samples = features.shape[0]
80        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
81        grad[:, : features.shape[1]] = features
82        return grad

Return gradient phi(x) for all samples, shape (n_samples, theta_dim).

@dataclass(frozen=True)
class SoftmaxPolicy(objective.Policy):
 85@dataclass(frozen=True)
 86class SoftmaxPolicy(Policy):
 87    """Softmax policy: $$u = 0.5 + \\sigma(\\theta^\\top \\phi(x)) \\in (0.5, 1.5)$$."""
 88
 89    kind: str = _POLICY_SOFTMAX
 90
 91    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
 92        """Return softmax action values, shape (n_samples,)."""
 93        features = _phi(x_batch)
 94        theta_arr = np.asarray(theta, dtype=float)
 95        if theta_arr.size < features.shape[1]:
 96            raise ValueError("theta must have at least state_dim + 1 elements.")
 97        z = features @ theta_arr[: features.shape[1]]
 98        return (0.5 + _sigmoid(z)).astype(float)
 99
100    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
101        """Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim)."""
102        features = _phi(x_batch)
103        theta_arr = np.asarray(theta, dtype=float)
104        if theta_arr.size < features.shape[1]:
105            raise ValueError("theta must have at least state_dim + 1 elements.")
106        z = features @ theta_arr[: features.shape[1]]
107        sigma = _sigmoid(z)
108        du_dz = sigma * (1.0 - sigma)
109        n_samples = features.shape[0]
110        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
111        grad[:, : features.shape[1]] = du_dz[:, None] * features
112        return grad

Softmax policy: $$u = 0.5 + \sigma(\theta^\top \phi(x)) \in (0.5, 1.5)$$.

SoftmaxPolicy(kind: str = 'softmax')
kind: str = 'softmax'
def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
91    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
92        """Return softmax action values, shape (n_samples,)."""
93        features = _phi(x_batch)
94        theta_arr = np.asarray(theta, dtype=float)
95        if theta_arr.size < features.shape[1]:
96            raise ValueError("theta must have at least state_dim + 1 elements.")
97        z = features @ theta_arr[: features.shape[1]]
98        return (0.5 + _sigmoid(z)).astype(float)

Return softmax action values, shape (n_samples,).

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
100    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
101        """Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim)."""
102        features = _phi(x_batch)
103        theta_arr = np.asarray(theta, dtype=float)
104        if theta_arr.size < features.shape[1]:
105            raise ValueError("theta must have at least state_dim + 1 elements.")
106        z = features @ theta_arr[: features.shape[1]]
107        sigma = _sigmoid(z)
108        du_dz = sigma * (1.0 - sigma)
109        n_samples = features.shape[0]
110        grad = np.zeros((n_samples, theta_arr.size), dtype=float)
111        grad[:, : features.shape[1]] = du_dz[:, None] * features
112        return grad

Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim).

def policy_from_kind( kind: str) -> ConstantPolicy | LinearPolicy | SoftmaxPolicy:
115def policy_from_kind(kind: str) -> ConstantPolicy | LinearPolicy | SoftmaxPolicy:
116    """Create a policy instance from a kind string."""
117    if kind == _POLICY_CONSTANT:
118        return ConstantPolicy()
119    if kind == _POLICY_LINEAR:
120        return LinearPolicy()
121    if kind == _POLICY_SOFTMAX:
122        return SoftmaxPolicy()
123    raise ValueError(f"Policy kind must be one of {_POLICY_KINDS}.")

Create a policy instance from a kind string.

@dataclass(frozen=True)
class FixedRegressionObjective(objective.Objective):
 16@dataclass(frozen=True)
 17class FixedRegressionObjective(Objective):
 18    """Pricing objective: $$f(u; x) = a(x,u)(\\ell(x) - r(u))$$.
 19
 20    Components: $$a = \\sigma(\\beta_1^\\top x + \\beta_2 u)$$, $$\\ell = \\beta_3^\\top x$$, $$r = \\beta_4 u$$.
 21    Computes theta-gradients via chain rule through the attached policy.
 22    """
 23
 24    policy: Policy
 25    beta_1: np.ndarray
 26    beta_2: float
 27    beta_3: np.ndarray
 28    beta_4: float
 29
 30    def __post_init__(self) -> None:
 31        beta_1 = np.asarray(self.beta_1, dtype=float)
 32        beta_2 = float(self.beta_2)
 33        beta_3 = np.asarray(self.beta_3, dtype=float)
 34        beta_4 = float(self.beta_4)
 35        if np.any(beta_1 <= 0.0):
 36            raise ValueError("beta_1 entries must be positive.")
 37        if beta_2 >= 0.0:
 38            raise ValueError(
 39                "beta_2 must be negative; acceptance probability should decrease as policy value increases."
 40            )
 41        if np.any(beta_3 <= 0.0):
 42            raise ValueError("beta_3 entries must be positive.")
 43        if beta_4 <= 0.0:
 44            raise ValueError("beta_4 must be positive.")
 45        object.__setattr__(self, "beta_1", beta_1)
 46        object.__setattr__(self, "beta_2", beta_2)
 47        object.__setattr__(self, "beta_3", beta_3)
 48        object.__setattr__(self, "beta_4", beta_4)
 49
 50    @classmethod
 51    def from_parameters(
 52        cls,
 53        policy: Policy,
 54        beta_1: np.ndarray | Sequence[float],
 55        beta_2: float,
 56        beta_3: np.ndarray | Sequence[float],
 57        beta_4: float,
 58    ) -> "FixedRegressionObjective":
 59        """Create objective from parameter values."""
 60        return cls(
 61            policy=policy,
 62            beta_1=np.asarray(beta_1, dtype=float),
 63            beta_2=float(beta_2),
 64            beta_3=np.asarray(beta_3, dtype=float),
 65            beta_4=float(beta_4),
 66        )
 67
 68    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
 69        """Compute mean objective value across batch."""
 70        x_arr = np.asarray(x_batch, dtype=float)
 71        if x_arr.ndim != 2:
 72            raise ValueError("x_batch must be a 2D array.")
 73        u_batch = self.policy.value(theta, x_arr)
 74        values = self._value_batch(x_arr, u_batch)
 75        return float(np.mean(values))
 76
 77    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
 78        """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta."""
 79        x_arr = np.asarray(x_batch, dtype=float)
 80        if x_arr.ndim != 2:
 81            raise ValueError("x_batch must be a 2D array.")
 82        theta_arr = np.asarray(theta, dtype=float)
 83        u_batch = self.policy.value(theta_arr, x_arr)
 84        grad_u = self._grad_u_batch(x_arr, u_batch)
 85        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
 86
 87    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
 88        """Compute mean objective value at a fixed action u."""
 89        x_arr = np.asarray(x_batch, dtype=float)
 90        if x_arr.ndim != 2:
 91            raise ValueError("x_batch must be a 2D array.")
 92        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
 93        values = self._value_batch(x_arr, u_arr)
 94        return float(np.mean(values))
 95
 96    def _value_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray:
 97        """Compute objective values for batch of (x, u) pairs."""
 98        beta_1_x = x_array @ self.beta_1[: x_array.shape[1]]
 99        beta_3_x = x_array @ self.beta_3[: x_array.shape[1]]
100        logits = beta_1_x + self.beta_2 * u_array
101        acceptance = _sigmoid(logits)
102        revenue = self.beta_4 * u_array
103        return acceptance * (beta_3_x - revenue)
104
105    def _grad_u_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray:
106        """Compute gradient w.r.t. u for batch of (x, u) pairs."""
107        beta_1_x = x_array @ self.beta_1[: x_array.shape[1]]
108        beta_3_x = x_array @ self.beta_3[: x_array.shape[1]]
109        logits = beta_1_x + self.beta_2 * u_array
110        acceptance = _sigmoid(logits)
111        d_acceptance_du = acceptance * (1.0 - acceptance) * self.beta_2
112        revenue = self.beta_4 * u_array
113        return d_acceptance_du * (beta_3_x - revenue) - acceptance * self.beta_4

Pricing objective: $$f(u; x) = a(x,u)(\ell(x) - r(u))$$.

Components: $$a = \sigma(\beta_1^\top x + \beta_2 u)$$, $$\ell = \beta_3^\top x$$, $$r = \beta_4 u$$. Computes theta-gradients via chain rule through the attached policy.

FixedRegressionObjective( policy: Policy, beta_1: numpy.ndarray, beta_2: float, beta_3: numpy.ndarray, beta_4: float)
policy: Policy
beta_1: numpy.ndarray
beta_2: float
beta_3: numpy.ndarray
beta_4: float
@classmethod
def from_parameters( cls, policy: Policy, beta_1: numpy.ndarray | Sequence[float], beta_2: float, beta_3: numpy.ndarray | Sequence[float], beta_4: float) -> FixedRegressionObjective:
50    @classmethod
51    def from_parameters(
52        cls,
53        policy: Policy,
54        beta_1: np.ndarray | Sequence[float],
55        beta_2: float,
56        beta_3: np.ndarray | Sequence[float],
57        beta_4: float,
58    ) -> "FixedRegressionObjective":
59        """Create objective from parameter values."""
60        return cls(
61            policy=policy,
62            beta_1=np.asarray(beta_1, dtype=float),
63            beta_2=float(beta_2),
64            beta_3=np.asarray(beta_3, dtype=float),
65            beta_4=float(beta_4),
66        )

Create objective from parameter values.

def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> float:
68    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
69        """Compute mean objective value across batch."""
70        x_arr = np.asarray(x_batch, dtype=float)
71        if x_arr.ndim != 2:
72            raise ValueError("x_batch must be a 2D array.")
73        u_batch = self.policy.value(theta, x_arr)
74        values = self._value_batch(x_arr, u_batch)
75        return float(np.mean(values))

Compute mean objective value across batch.

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
77    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
78        """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta."""
79        x_arr = np.asarray(x_batch, dtype=float)
80        if x_arr.ndim != 2:
81            raise ValueError("x_batch must be a 2D array.")
82        theta_arr = np.asarray(theta, dtype=float)
83        u_batch = self.policy.value(theta_arr, x_arr)
84        grad_u = self._grad_u_batch(x_arr, u_batch)
85        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)

Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.

def value_at_u(self, x_batch: numpy.ndarray, u: float) -> float:
87    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
88        """Compute mean objective value at a fixed action u."""
89        x_arr = np.asarray(x_batch, dtype=float)
90        if x_arr.ndim != 2:
91            raise ValueError("x_batch must be a 2D array.")
92        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
93        values = self._value_batch(x_arr, u_arr)
94        return float(np.mean(values))

Compute mean objective value at a fixed action u.

@dataclass(frozen=True)
class ModelBasedObjective(objective.Objective):
 16@dataclass(frozen=True)
 17class ModelBasedObjective(Objective):
 18    """Pricing objective backed by trained ML models.
 19
 20    $$f(u; x) = a(x,u) \\cdot (\\hat{Y}(x) - u \\cdot p(x))$$
 21
 22    where $$a(x,u)$$ is the acceptance probability (sklearn Pipeline or XGBClassifier),
 23    $$\\hat{Y}(x)$$ is the expected financial loss (LinearRegression or XGBRegressor),
 24    and $$p(x)$$ is the policy premium extracted from column ``premium_col`` of x.
 25
 26    ``acceptance_model`` expects a DataFrame with ``acceptance_state_cols + ["U"]``.
 27    ``loss_model`` expects a DataFrame with ``loss_cols``.
 28
 29    If ``u_coef`` is provided, the analytical gradient
 30    $$da/dU = a(1-a) \\cdot u_{coef}$$ is used (GLM path).
 31    Otherwise numerical central finite differences are used (XGBoost path).
 32    """
 33
 34    policy: Policy
 35    acceptance_model: Any
 36    loss_model: Any
 37    # Column names for model inference DataFrames
 38    acceptance_state_cols: tuple[str, ...]  # 10 state cols passed to acceptance model (no U)
 39    loss_cols: tuple[str, ...]              # 9 cols passed to loss model
 40    premium_col: int = 9                    # index of X_policy_premium in x_batch
 41    u_coef: float | None = None             # w_U / std_U for analytical GLM gradient
 42    _fd_eps: float = 1e-4                   # step size for numerical d_acceptance/dU
 43
 44    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
 45        """Compute mean objective value across batch."""
 46        x_arr = np.asarray(x_batch, dtype=float)
 47        if x_arr.ndim != 2:
 48            raise ValueError("x_batch must be 2D.")
 49        theta_arr = np.asarray(theta, dtype=float)
 50        u_batch = self.policy.value(theta_arr, x_arr)
 51        return float(np.mean(self._value_batch(x_arr, u_batch)))
 52
 53    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
 54        """Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta)."""
 55        x_arr = np.asarray(x_batch, dtype=float)
 56        if x_arr.ndim != 2:
 57            raise ValueError("x_batch must be 2D.")
 58        theta_arr = np.asarray(theta, dtype=float)
 59        u_batch = self.policy.value(theta_arr, x_arr)
 60        grad_u = self._grad_u_batch(x_arr, u_batch)
 61        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
 62
 63    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
 64        """Compute mean objective value at a fixed action u."""
 65        x_arr = np.asarray(x_batch, dtype=float)
 66        if x_arr.ndim != 2:
 67            raise ValueError("x_batch must be 2D.")
 68        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
 69        return float(np.mean(self._value_batch(x_arr, u_arr)))
 70
 71    # --- Private helpers ---
 72
 73    def _acceptance_proba(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray:
 74        """Call acceptance model on (x_batch state cols + u_arr). Returns shape (n,)."""
 75        x_state = x_batch[:, : len(self.acceptance_state_cols)]
 76        df = pd.DataFrame(
 77            np.column_stack([x_state, u_arr]),
 78            columns=list(self.acceptance_state_cols) + ["U"],
 79        )
 80        return np.asarray(self.acceptance_model.predict_proba(df)[:, 1], dtype=float)
 81
 82    def _loss_prediction(self, x_batch: np.ndarray) -> np.ndarray:
 83        """Call loss model on loss_cols subset of x_batch. Returns shape (n,)."""
 84        x_loss = x_batch[:, : len(self.loss_cols)]
 85        df = pd.DataFrame(x_loss, columns=list(self.loss_cols))
 86        return np.asarray(self.loss_model.predict(df), dtype=float)
 87
 88    def _value_batch(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray:
 89        """Compute per-sample objective values."""
 90        acceptance = self._acceptance_proba(x_batch, u_arr)
 91        loss = self._loss_prediction(x_batch)
 92        premium = x_batch[:, self.premium_col]
 93        revenue = u_arr * premium
 94        return acceptance * (loss - revenue)
 95
 96    def _grad_u_batch(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray:
 97        """Compute df/du for each sample.
 98
 99        GLM path (u_coef set): analytical d_acceptance/du = a(1-a) * u_coef.
100        XGBoost path (u_coef is None): central FD on acceptance model.
101        """
102        acceptance = self._acceptance_proba(x_batch, u_arr)
103        loss = self._loss_prediction(x_batch)
104        premium = x_batch[:, self.premium_col]
105        revenue = u_arr * premium
106
107        if self.u_coef is not None:
108            # Analytical: d_acceptance/dU = a(1-a) * u_coef
109            d_acceptance_du = acceptance * (1.0 - acceptance) * self.u_coef
110        else:
111            # Numerical central FD: d_acceptance/dU ≈ (a(u+ε) - a(u-ε)) / (2ε)
112            eps = self._fd_eps
113            a_plus = self._acceptance_proba(x_batch, u_arr + eps)
114            a_minus = self._acceptance_proba(x_batch, u_arr - eps)
115            d_acceptance_du = (a_plus - a_minus) / (2.0 * eps)
116
117        # df/du = d_acceptance/du * (loss - revenue) - acceptance * premium
118        return d_acceptance_du * (loss - revenue) - acceptance * premium

Pricing objective backed by trained ML models.

$$f(u; x) = a(x,u) \cdot (\hat{Y}(x) - u \cdot p(x))$$

where $$a(x,u)$$ is the acceptance probability (sklearn Pipeline or XGBClassifier), $$\hat{Y}(x)$$ is the expected financial loss (LinearRegression or XGBRegressor), and $$p(x)$$ is the policy premium extracted from column premium_col of x.

acceptance_model expects a DataFrame with acceptance_state_cols + ["U"]. loss_model expects a DataFrame with loss_cols.

If u_coef is provided, the analytical gradient $$da/dU = a(1-a) \cdot u_{coef}$$ is used (GLM path). Otherwise numerical central finite differences are used (XGBoost path).

ModelBasedObjective( policy: Policy, acceptance_model: Any, loss_model: Any, acceptance_state_cols: tuple[str, ...], loss_cols: tuple[str, ...], premium_col: int = 9, u_coef: float | None = None, _fd_eps: float = 0.0001)
policy: Policy
acceptance_model: Any
loss_model: Any
acceptance_state_cols: tuple[str, ...]
loss_cols: tuple[str, ...]
premium_col: int = 9
u_coef: float | None = None
def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> float:
44    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
45        """Compute mean objective value across batch."""
46        x_arr = np.asarray(x_batch, dtype=float)
47        if x_arr.ndim != 2:
48            raise ValueError("x_batch must be 2D.")
49        theta_arr = np.asarray(theta, dtype=float)
50        u_batch = self.policy.value(theta_arr, x_arr)
51        return float(np.mean(self._value_batch(x_arr, u_batch)))

Compute mean objective value across batch.

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
53    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
54        """Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta)."""
55        x_arr = np.asarray(x_batch, dtype=float)
56        if x_arr.ndim != 2:
57            raise ValueError("x_batch must be 2D.")
58        theta_arr = np.asarray(theta, dtype=float)
59        u_batch = self.policy.value(theta_arr, x_arr)
60        grad_u = self._grad_u_batch(x_arr, u_batch)
61        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)

Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta).

def value_at_u(self, x_batch: numpy.ndarray, u: float) -> float:
63    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
64        """Compute mean objective value at a fixed action u."""
65        x_arr = np.asarray(x_batch, dtype=float)
66        if x_arr.ndim != 2:
67            raise ValueError("x_batch must be 2D.")
68        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
69        return float(np.mean(self._value_batch(x_arr, u_arr)))

Compute mean objective value at a fixed action u.

@dataclass(frozen=True)
class PlantedLogisticObjective(objective.Objective):
 16@dataclass(frozen=True)
 17class PlantedLogisticObjective(Objective):
 18    """Convex logistic objective with known optimum $$u^*$$ for algorithm validation.
 19
 20    $$L(u; x) = \\log(1 + e^z) - p^*(x) z$$ where $$z = \\alpha u + \\beta^\\top x + b$$.
 21    """
 22
 23    policy: Policy
 24    alpha: float
 25    beta: np.ndarray
 26    bias: float
 27    u_star: float
 28
 29    def __post_init__(self) -> None:
 30        alpha = float(self.alpha)
 31        beta = np.asarray(self.beta, dtype=float)
 32        bias = float(self.bias)
 33        u_star = float(self.u_star)
 34        if alpha == 0.0:
 35            raise ValueError("alpha must be nonzero for a unique optimum.")
 36        object.__setattr__(self, "alpha", alpha)
 37        object.__setattr__(self, "beta", beta)
 38        object.__setattr__(self, "bias", bias)
 39        object.__setattr__(self, "u_star", u_star)
 40
 41    @classmethod
 42    def from_parameters(
 43        cls,
 44        policy: Policy,
 45        alpha: float,
 46        beta: np.ndarray | Sequence[float],
 47        bias: float,
 48        u_star: float,
 49    ) -> "PlantedLogisticObjective":
 50        """Create objective from parameter values."""
 51        return cls(
 52            policy=policy,
 53            alpha=float(alpha),
 54            beta=np.asarray(beta, dtype=float),
 55            bias=float(bias),
 56            u_star=float(u_star),
 57        )
 58
 59    def optimal_u(self) -> float:
 60        """Return the planted optimal action value."""
 61        return float(self.u_star)
 62
 63    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
 64        """Compute mean objective value across batch."""
 65        x_arr = np.asarray(x_batch, dtype=float)
 66        if x_arr.ndim != 2:
 67            raise ValueError("x_batch must be a 2D array.")
 68        u_batch = self.policy.value(theta, x_arr)
 69        values = self._value_batch(x_arr, u_batch)
 70        return float(np.mean(values))
 71
 72    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
 73        """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta."""
 74        x_arr = np.asarray(x_batch, dtype=float)
 75        if x_arr.ndim != 2:
 76            raise ValueError("x_batch must be a 2D array.")
 77        theta_arr = np.asarray(theta, dtype=float)
 78        u_batch = self.policy.value(theta_arr, x_arr)
 79        grad_u = self._grad_u_batch(x_arr, u_batch)
 80        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
 81
 82    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
 83        """Compute mean objective value at a fixed action u."""
 84        x_arr = np.asarray(x_batch, dtype=float)
 85        if x_arr.ndim != 2:
 86            raise ValueError("x_batch must be a 2D array.")
 87        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
 88        values = self._value_batch(x_arr, u_arr)
 89        return float(np.mean(values))
 90
 91    def _value_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray:
 92        """Compute objective values for batch of (x, u) pairs."""
 93        beta_x = x_array @ self.beta[: x_array.shape[1]]
 94        z = self.alpha * u_array + beta_x + self.bias
 95        z_star = self.alpha * self.u_star + beta_x + self.bias
 96        p_star = _sigmoid(z_star)
 97        return np.logaddexp(0.0, z) - p_star * z
 98
 99    def _grad_u_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray:
100        """Compute gradient w.r.t. u for batch of (x, u) pairs."""
101        beta_x = x_array @ self.beta[: x_array.shape[1]]
102        z = self.alpha * u_array + beta_x + self.bias
103        z_star = self.alpha * self.u_star + beta_x + self.bias
104        p = _sigmoid(z)
105        p_star = _sigmoid(z_star)
106        return self.alpha * (p - p_star)

Convex logistic objective with known optimum $$u^*$$ for algorithm validation.

$$L(u; x) = \log(1 + e^z) - p^*(x) z$$ where $$z = \alpha u + \beta^\top x + b$$.

PlantedLogisticObjective( policy: Policy, alpha: float, beta: numpy.ndarray, bias: float, u_star: float)
policy: Policy
alpha: float
beta: numpy.ndarray
bias: float
u_star: float
@classmethod
def from_parameters( cls, policy: Policy, alpha: float, beta: numpy.ndarray | Sequence[float], bias: float, u_star: float) -> PlantedLogisticObjective:
41    @classmethod
42    def from_parameters(
43        cls,
44        policy: Policy,
45        alpha: float,
46        beta: np.ndarray | Sequence[float],
47        bias: float,
48        u_star: float,
49    ) -> "PlantedLogisticObjective":
50        """Create objective from parameter values."""
51        return cls(
52            policy=policy,
53            alpha=float(alpha),
54            beta=np.asarray(beta, dtype=float),
55            bias=float(bias),
56            u_star=float(u_star),
57        )

Create objective from parameter values.

def optimal_u(self) -> float:
59    def optimal_u(self) -> float:
60        """Return the planted optimal action value."""
61        return float(self.u_star)

Return the planted optimal action value.

def value(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> float:
63    def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float:
64        """Compute mean objective value across batch."""
65        x_arr = np.asarray(x_batch, dtype=float)
66        if x_arr.ndim != 2:
67            raise ValueError("x_batch must be a 2D array.")
68        u_batch = self.policy.value(theta, x_arr)
69        values = self._value_batch(x_arr, u_batch)
70        return float(np.mean(values))

Compute mean objective value across batch.

def grad(self, theta: numpy.ndarray, x_batch: numpy.ndarray) -> numpy.ndarray:
72    def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray:
73        """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta."""
74        x_arr = np.asarray(x_batch, dtype=float)
75        if x_arr.ndim != 2:
76            raise ValueError("x_batch must be a 2D array.")
77        theta_arr = np.asarray(theta, dtype=float)
78        u_batch = self.policy.value(theta_arr, x_arr)
79        grad_u = self._grad_u_batch(x_arr, u_batch)
80        return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)

Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.

def value_at_u(self, x_batch: numpy.ndarray, u: float) -> float:
82    def value_at_u(self, x_batch: np.ndarray, u: float) -> float:
83        """Compute mean objective value at a fixed action u."""
84        x_arr = np.asarray(x_batch, dtype=float)
85        if x_arr.ndim != 2:
86            raise ValueError("x_batch must be a 2D array.")
87        u_arr = np.full(x_arr.shape[0], float(u), dtype=float)
88        values = self._value_batch(x_arr, u_arr)
89        return float(np.mean(values))

Compute mean objective value at a fixed action u.

def optimal_u(objective: Objective) -> float | None:
49def optimal_u(objective: "Objective") -> float | None:
50    """Return optimal action u* if the objective exposes it.
51
52    For objectives with a known optimum (e.g., PlantedLogisticObjective),
53    this returns the optimal action value. New objectives can expose this
54    by implementing an `optimal_u() -> float` method.
55
56    Args:
57        objective: A theta-level objective.
58
59    Returns:
60        The optimal action value if available, otherwise None.
61    """
62    optimal_fn = getattr(objective, "optimal_u", None)
63    if callable(optimal_fn):
64        result = optimal_fn()
65        if result is not None:
66            return float(result)
67    u_star_attr = getattr(objective, "u_star", None)
68    if u_star_attr is not None:
69        return float(u_star_attr)
70    return None

Return optimal action u* if the objective exposes it.

For objectives with a known optimum (e.g., PlantedLogisticObjective), this returns the optimal action value. New objectives can expose this by implementing an optimal_u() -> float method.

Args: objective: A theta-level objective.

Returns: The optimal action value if available, otherwise None.