objective
Objective module public API.
This module provides:
- Base interfaces: Objective, Policy
- Sampling: sample_states, default_rng
- Concrete policies: ConstantPolicy, LinearPolicy, SoftmaxPolicy
- Concrete objectives: FixedRegressionObjective, PlantedLogisticObjective, ModelBasedObjective
- Utility: optimal_u
1"""Objective module public API. 2 3This module provides: 4- Base interfaces: Objective, Policy 5- Sampling: sample_states, default_rng 6- Concrete policies: ConstantPolicy, LinearPolicy, SoftmaxPolicy 7- Concrete objectives: FixedRegressionObjective, PlantedLogisticObjective, 8 ModelBasedObjective 9- Utility: optimal_u 10""" 11 12from objective.base import ( 13 Objective, 14 Policy, 15 default_rng, 16 sample_states, 17) 18from objective.objectives import ( 19 FixedRegressionObjective, 20 ModelBasedObjective, 21 PlantedLogisticObjective, 22) 23from objective.policy import ( 24 ConstantPolicy, 25 LinearPolicy, 26 SoftmaxPolicy, 27 policy_from_kind, 28) 29from objective.utils import optimal_u 30 31__all__ = [ 32 # Base interfaces 33 "Objective", 34 "Policy", 35 "default_rng", 36 "sample_states", 37 # Concrete policies 38 "ConstantPolicy", 39 "LinearPolicy", 40 "SoftmaxPolicy", 41 "policy_from_kind", 42 # Concrete objectives 43 "FixedRegressionObjective", 44 "ModelBasedObjective", 45 "PlantedLogisticObjective", 46 # Utility 47 "optimal_u", 48]
35class Objective: 36 """Theta-space objective: $$J(\\theta) = \\mathbb{E}_x[f(\\pi_\\theta(x); x)]$$.""" 37 38 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 39 """Return mean objective value for ``theta`` on ``x_batch``.""" 40 raise NotImplementedError 41 42 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 43 """Return theta-gradient for ``theta`` on ``x_batch``.""" 44 raise NotImplementedError
Theta-space objective: $$J(\theta) = \mathbb{E}_x[f(\pi_\theta(x); x)]$$.
23class Policy: 24 """Policy interface: $$u = \\pi_\\theta(x)$$ with gradient $$\\partial u / \\partial \\theta$$.""" 25 26 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 27 """Return action values for batch, shape (n_samples,).""" 28 raise NotImplementedError 29 30 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 31 """Return policy gradients for batch, shape (n_samples, theta_dim).""" 32 raise NotImplementedError
Policy interface: $$u = \pi_\theta(x)$$ with gradient $$\partial u / \partial \theta$$.
26 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 27 """Return action values for batch, shape (n_samples,).""" 28 raise NotImplementedError
Return action values for batch, shape (n_samples,).
30 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 31 """Return policy gradients for batch, shape (n_samples, theta_dim).""" 32 raise NotImplementedError
Return policy gradients for batch, shape (n_samples, theta_dim).
11def default_rng(seed: Optional[int] = None) -> np.random.Generator: 12 """Return a NumPy random generator, optionally seeded.""" 13 return np.random.default_rng(seed)
Return a NumPy random generator, optionally seeded.
16def sample_states(rng: np.random.Generator, n: int, dim: int) -> np.ndarray: 17 """Sample n state vectors from $$\\mathcal{N}(0, I)$$, shape (n, dim).""" 18 if n <= 0 or dim <= 0: 19 raise ValueError("n and dim must be positive.") 20 return rng.normal(0.0, 1.0, size=(n, dim)).astype(float)
Sample n state vectors from $$\mathcal{N}(0, I)$$, shape (n, dim).
29@dataclass(frozen=True) 30class ConstantPolicy(Policy): 31 """Constant policy: $$u = \\theta_0$$, ignores state $$x$$.""" 32 33 kind: str = _POLICY_CONSTANT 34 35 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 36 """Return constant action for all samples, shape (n_samples,).""" 37 x_arr = np.asarray(x_batch, dtype=float) 38 if x_arr.ndim != 2: 39 raise ValueError("x_batch must be a 2D array.") 40 theta_arr = np.asarray(theta, dtype=float) 41 if theta_arr.size < 1: 42 raise ValueError("theta must have at least one element.") 43 return np.full(x_arr.shape[0], float(theta_arr[0]), dtype=float) 44 45 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 46 """Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim).""" 47 x_arr = np.asarray(x_batch, dtype=float) 48 if x_arr.ndim != 2: 49 raise ValueError("x_batch must be a 2D array.") 50 theta_arr = np.asarray(theta, dtype=float) 51 if theta_arr.size < 1: 52 raise ValueError("theta must have at least one element.") 53 n_samples = x_arr.shape[0] 54 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 55 grad[:, 0] = 1.0 56 return grad
Constant policy: $$u = \theta_0$$, ignores state $$x$$.
35 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 36 """Return constant action for all samples, shape (n_samples,).""" 37 x_arr = np.asarray(x_batch, dtype=float) 38 if x_arr.ndim != 2: 39 raise ValueError("x_batch must be a 2D array.") 40 theta_arr = np.asarray(theta, dtype=float) 41 if theta_arr.size < 1: 42 raise ValueError("theta must have at least one element.") 43 return np.full(x_arr.shape[0], float(theta_arr[0]), dtype=float)
Return constant action for all samples, shape (n_samples,).
45 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 46 """Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim).""" 47 x_arr = np.asarray(x_batch, dtype=float) 48 if x_arr.ndim != 2: 49 raise ValueError("x_batch must be a 2D array.") 50 theta_arr = np.asarray(theta, dtype=float) 51 if theta_arr.size < 1: 52 raise ValueError("theta must have at least one element.") 53 n_samples = x_arr.shape[0] 54 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 55 grad[:, 0] = 1.0 56 return grad
Return gradient [1, 0, ...] for all samples, shape (n_samples, theta_dim).
59@dataclass(frozen=True) 60class LinearPolicy(Policy): 61 """Linear policy: $$u = \\theta^\\top \\phi(x)$$ where $$\\phi(x) = [1, x]$$.""" 62 63 kind: str = _POLICY_LINEAR 64 65 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 66 """Return linear action values, shape (n_samples,).""" 67 features = _phi(x_batch) 68 theta_arr = np.asarray(theta, dtype=float) 69 if theta_arr.size < features.shape[1]: 70 raise ValueError("theta must have at least state_dim + 1 elements.") 71 return (features @ theta_arr[: features.shape[1]]).astype(float) 72 73 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 74 """Return gradient phi(x) for all samples, shape (n_samples, theta_dim).""" 75 features = _phi(x_batch) 76 theta_arr = np.asarray(theta, dtype=float) 77 if theta_arr.size < features.shape[1]: 78 raise ValueError("theta must have at least state_dim + 1 elements.") 79 n_samples = features.shape[0] 80 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 81 grad[:, : features.shape[1]] = features 82 return grad
Linear policy: $$u = \theta^\top \phi(x)$$ where $$\phi(x) = [1, x]$$.
65 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 66 """Return linear action values, shape (n_samples,).""" 67 features = _phi(x_batch) 68 theta_arr = np.asarray(theta, dtype=float) 69 if theta_arr.size < features.shape[1]: 70 raise ValueError("theta must have at least state_dim + 1 elements.") 71 return (features @ theta_arr[: features.shape[1]]).astype(float)
Return linear action values, shape (n_samples,).
73 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 74 """Return gradient phi(x) for all samples, shape (n_samples, theta_dim).""" 75 features = _phi(x_batch) 76 theta_arr = np.asarray(theta, dtype=float) 77 if theta_arr.size < features.shape[1]: 78 raise ValueError("theta must have at least state_dim + 1 elements.") 79 n_samples = features.shape[0] 80 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 81 grad[:, : features.shape[1]] = features 82 return grad
Return gradient phi(x) for all samples, shape (n_samples, theta_dim).
85@dataclass(frozen=True) 86class SoftmaxPolicy(Policy): 87 """Softmax policy: $$u = 0.5 + \\sigma(\\theta^\\top \\phi(x)) \\in (0.5, 1.5)$$.""" 88 89 kind: str = _POLICY_SOFTMAX 90 91 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 92 """Return softmax action values, shape (n_samples,).""" 93 features = _phi(x_batch) 94 theta_arr = np.asarray(theta, dtype=float) 95 if theta_arr.size < features.shape[1]: 96 raise ValueError("theta must have at least state_dim + 1 elements.") 97 z = features @ theta_arr[: features.shape[1]] 98 return (0.5 + _sigmoid(z)).astype(float) 99 100 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 101 """Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim).""" 102 features = _phi(x_batch) 103 theta_arr = np.asarray(theta, dtype=float) 104 if theta_arr.size < features.shape[1]: 105 raise ValueError("theta must have at least state_dim + 1 elements.") 106 z = features @ theta_arr[: features.shape[1]] 107 sigma = _sigmoid(z) 108 du_dz = sigma * (1.0 - sigma) 109 n_samples = features.shape[0] 110 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 111 grad[:, : features.shape[1]] = du_dz[:, None] * features 112 return grad
Softmax policy: $$u = 0.5 + \sigma(\theta^\top \phi(x)) \in (0.5, 1.5)$$.
91 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 92 """Return softmax action values, shape (n_samples,).""" 93 features = _phi(x_batch) 94 theta_arr = np.asarray(theta, dtype=float) 95 if theta_arr.size < features.shape[1]: 96 raise ValueError("theta must have at least state_dim + 1 elements.") 97 z = features @ theta_arr[: features.shape[1]] 98 return (0.5 + _sigmoid(z)).astype(float)
Return softmax action values, shape (n_samples,).
100 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 101 """Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim).""" 102 features = _phi(x_batch) 103 theta_arr = np.asarray(theta, dtype=float) 104 if theta_arr.size < features.shape[1]: 105 raise ValueError("theta must have at least state_dim + 1 elements.") 106 z = features @ theta_arr[: features.shape[1]] 107 sigma = _sigmoid(z) 108 du_dz = sigma * (1.0 - sigma) 109 n_samples = features.shape[0] 110 grad = np.zeros((n_samples, theta_arr.size), dtype=float) 111 grad[:, : features.shape[1]] = du_dz[:, None] * features 112 return grad
Return gradient sigma'(z) * phi(x) for all samples, shape (n_samples, theta_dim).
115def policy_from_kind(kind: str) -> ConstantPolicy | LinearPolicy | SoftmaxPolicy: 116 """Create a policy instance from a kind string.""" 117 if kind == _POLICY_CONSTANT: 118 return ConstantPolicy() 119 if kind == _POLICY_LINEAR: 120 return LinearPolicy() 121 if kind == _POLICY_SOFTMAX: 122 return SoftmaxPolicy() 123 raise ValueError(f"Policy kind must be one of {_POLICY_KINDS}.")
Create a policy instance from a kind string.
16@dataclass(frozen=True) 17class FixedRegressionObjective(Objective): 18 """Pricing objective: $$f(u; x) = a(x,u)(\\ell(x) - r(u))$$. 19 20 Components: $$a = \\sigma(\\beta_1^\\top x + \\beta_2 u)$$, $$\\ell = \\beta_3^\\top x$$, $$r = \\beta_4 u$$. 21 Computes theta-gradients via chain rule through the attached policy. 22 """ 23 24 policy: Policy 25 beta_1: np.ndarray 26 beta_2: float 27 beta_3: np.ndarray 28 beta_4: float 29 30 def __post_init__(self) -> None: 31 beta_1 = np.asarray(self.beta_1, dtype=float) 32 beta_2 = float(self.beta_2) 33 beta_3 = np.asarray(self.beta_3, dtype=float) 34 beta_4 = float(self.beta_4) 35 if np.any(beta_1 <= 0.0): 36 raise ValueError("beta_1 entries must be positive.") 37 if beta_2 >= 0.0: 38 raise ValueError( 39 "beta_2 must be negative; acceptance probability should decrease as policy value increases." 40 ) 41 if np.any(beta_3 <= 0.0): 42 raise ValueError("beta_3 entries must be positive.") 43 if beta_4 <= 0.0: 44 raise ValueError("beta_4 must be positive.") 45 object.__setattr__(self, "beta_1", beta_1) 46 object.__setattr__(self, "beta_2", beta_2) 47 object.__setattr__(self, "beta_3", beta_3) 48 object.__setattr__(self, "beta_4", beta_4) 49 50 @classmethod 51 def from_parameters( 52 cls, 53 policy: Policy, 54 beta_1: np.ndarray | Sequence[float], 55 beta_2: float, 56 beta_3: np.ndarray | Sequence[float], 57 beta_4: float, 58 ) -> "FixedRegressionObjective": 59 """Create objective from parameter values.""" 60 return cls( 61 policy=policy, 62 beta_1=np.asarray(beta_1, dtype=float), 63 beta_2=float(beta_2), 64 beta_3=np.asarray(beta_3, dtype=float), 65 beta_4=float(beta_4), 66 ) 67 68 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 69 """Compute mean objective value across batch.""" 70 x_arr = np.asarray(x_batch, dtype=float) 71 if x_arr.ndim != 2: 72 raise ValueError("x_batch must be a 2D array.") 73 u_batch = self.policy.value(theta, x_arr) 74 values = self._value_batch(x_arr, u_batch) 75 return float(np.mean(values)) 76 77 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 78 """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.""" 79 x_arr = np.asarray(x_batch, dtype=float) 80 if x_arr.ndim != 2: 81 raise ValueError("x_batch must be a 2D array.") 82 theta_arr = np.asarray(theta, dtype=float) 83 u_batch = self.policy.value(theta_arr, x_arr) 84 grad_u = self._grad_u_batch(x_arr, u_batch) 85 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u) 86 87 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 88 """Compute mean objective value at a fixed action u.""" 89 x_arr = np.asarray(x_batch, dtype=float) 90 if x_arr.ndim != 2: 91 raise ValueError("x_batch must be a 2D array.") 92 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 93 values = self._value_batch(x_arr, u_arr) 94 return float(np.mean(values)) 95 96 def _value_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray: 97 """Compute objective values for batch of (x, u) pairs.""" 98 beta_1_x = x_array @ self.beta_1[: x_array.shape[1]] 99 beta_3_x = x_array @ self.beta_3[: x_array.shape[1]] 100 logits = beta_1_x + self.beta_2 * u_array 101 acceptance = _sigmoid(logits) 102 revenue = self.beta_4 * u_array 103 return acceptance * (beta_3_x - revenue) 104 105 def _grad_u_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray: 106 """Compute gradient w.r.t. u for batch of (x, u) pairs.""" 107 beta_1_x = x_array @ self.beta_1[: x_array.shape[1]] 108 beta_3_x = x_array @ self.beta_3[: x_array.shape[1]] 109 logits = beta_1_x + self.beta_2 * u_array 110 acceptance = _sigmoid(logits) 111 d_acceptance_du = acceptance * (1.0 - acceptance) * self.beta_2 112 revenue = self.beta_4 * u_array 113 return d_acceptance_du * (beta_3_x - revenue) - acceptance * self.beta_4
Pricing objective: $$f(u; x) = a(x,u)(\ell(x) - r(u))$$.
Components: $$a = \sigma(\beta_1^\top x + \beta_2 u)$$, $$\ell = \beta_3^\top x$$, $$r = \beta_4 u$$. Computes theta-gradients via chain rule through the attached policy.
50 @classmethod 51 def from_parameters( 52 cls, 53 policy: Policy, 54 beta_1: np.ndarray | Sequence[float], 55 beta_2: float, 56 beta_3: np.ndarray | Sequence[float], 57 beta_4: float, 58 ) -> "FixedRegressionObjective": 59 """Create objective from parameter values.""" 60 return cls( 61 policy=policy, 62 beta_1=np.asarray(beta_1, dtype=float), 63 beta_2=float(beta_2), 64 beta_3=np.asarray(beta_3, dtype=float), 65 beta_4=float(beta_4), 66 )
Create objective from parameter values.
68 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 69 """Compute mean objective value across batch.""" 70 x_arr = np.asarray(x_batch, dtype=float) 71 if x_arr.ndim != 2: 72 raise ValueError("x_batch must be a 2D array.") 73 u_batch = self.policy.value(theta, x_arr) 74 values = self._value_batch(x_arr, u_batch) 75 return float(np.mean(values))
Compute mean objective value across batch.
77 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 78 """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.""" 79 x_arr = np.asarray(x_batch, dtype=float) 80 if x_arr.ndim != 2: 81 raise ValueError("x_batch must be a 2D array.") 82 theta_arr = np.asarray(theta, dtype=float) 83 u_batch = self.policy.value(theta_arr, x_arr) 84 grad_u = self._grad_u_batch(x_arr, u_batch) 85 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.
87 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 88 """Compute mean objective value at a fixed action u.""" 89 x_arr = np.asarray(x_batch, dtype=float) 90 if x_arr.ndim != 2: 91 raise ValueError("x_batch must be a 2D array.") 92 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 93 values = self._value_batch(x_arr, u_arr) 94 return float(np.mean(values))
Compute mean objective value at a fixed action u.
16@dataclass(frozen=True) 17class ModelBasedObjective(Objective): 18 """Pricing objective backed by trained ML models. 19 20 $$f(u; x) = a(x,u) \\cdot (\\hat{Y}(x) - u \\cdot p(x))$$ 21 22 where $$a(x,u)$$ is the acceptance probability (sklearn Pipeline or XGBClassifier), 23 $$\\hat{Y}(x)$$ is the expected financial loss (LinearRegression or XGBRegressor), 24 and $$p(x)$$ is the policy premium extracted from column ``premium_col`` of x. 25 26 ``acceptance_model`` expects a DataFrame with ``acceptance_state_cols + ["U"]``. 27 ``loss_model`` expects a DataFrame with ``loss_cols``. 28 29 If ``u_coef`` is provided, the analytical gradient 30 $$da/dU = a(1-a) \\cdot u_{coef}$$ is used (GLM path). 31 Otherwise numerical central finite differences are used (XGBoost path). 32 """ 33 34 policy: Policy 35 acceptance_model: Any 36 loss_model: Any 37 # Column names for model inference DataFrames 38 acceptance_state_cols: tuple[str, ...] # 10 state cols passed to acceptance model (no U) 39 loss_cols: tuple[str, ...] # 9 cols passed to loss model 40 premium_col: int = 9 # index of X_policy_premium in x_batch 41 u_coef: float | None = None # w_U / std_U for analytical GLM gradient 42 _fd_eps: float = 1e-4 # step size for numerical d_acceptance/dU 43 44 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 45 """Compute mean objective value across batch.""" 46 x_arr = np.asarray(x_batch, dtype=float) 47 if x_arr.ndim != 2: 48 raise ValueError("x_batch must be 2D.") 49 theta_arr = np.asarray(theta, dtype=float) 50 u_batch = self.policy.value(theta_arr, x_arr) 51 return float(np.mean(self._value_batch(x_arr, u_batch))) 52 53 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 54 """Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta).""" 55 x_arr = np.asarray(x_batch, dtype=float) 56 if x_arr.ndim != 2: 57 raise ValueError("x_batch must be 2D.") 58 theta_arr = np.asarray(theta, dtype=float) 59 u_batch = self.policy.value(theta_arr, x_arr) 60 grad_u = self._grad_u_batch(x_arr, u_batch) 61 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u) 62 63 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 64 """Compute mean objective value at a fixed action u.""" 65 x_arr = np.asarray(x_batch, dtype=float) 66 if x_arr.ndim != 2: 67 raise ValueError("x_batch must be 2D.") 68 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 69 return float(np.mean(self._value_batch(x_arr, u_arr))) 70 71 # --- Private helpers --- 72 73 def _acceptance_proba(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray: 74 """Call acceptance model on (x_batch state cols + u_arr). Returns shape (n,).""" 75 x_state = x_batch[:, : len(self.acceptance_state_cols)] 76 df = pd.DataFrame( 77 np.column_stack([x_state, u_arr]), 78 columns=list(self.acceptance_state_cols) + ["U"], 79 ) 80 return np.asarray(self.acceptance_model.predict_proba(df)[:, 1], dtype=float) 81 82 def _loss_prediction(self, x_batch: np.ndarray) -> np.ndarray: 83 """Call loss model on loss_cols subset of x_batch. Returns shape (n,).""" 84 x_loss = x_batch[:, : len(self.loss_cols)] 85 df = pd.DataFrame(x_loss, columns=list(self.loss_cols)) 86 return np.asarray(self.loss_model.predict(df), dtype=float) 87 88 def _value_batch(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray: 89 """Compute per-sample objective values.""" 90 acceptance = self._acceptance_proba(x_batch, u_arr) 91 loss = self._loss_prediction(x_batch) 92 premium = x_batch[:, self.premium_col] 93 revenue = u_arr * premium 94 return acceptance * (loss - revenue) 95 96 def _grad_u_batch(self, x_batch: np.ndarray, u_arr: np.ndarray) -> np.ndarray: 97 """Compute df/du for each sample. 98 99 GLM path (u_coef set): analytical d_acceptance/du = a(1-a) * u_coef. 100 XGBoost path (u_coef is None): central FD on acceptance model. 101 """ 102 acceptance = self._acceptance_proba(x_batch, u_arr) 103 loss = self._loss_prediction(x_batch) 104 premium = x_batch[:, self.premium_col] 105 revenue = u_arr * premium 106 107 if self.u_coef is not None: 108 # Analytical: d_acceptance/dU = a(1-a) * u_coef 109 d_acceptance_du = acceptance * (1.0 - acceptance) * self.u_coef 110 else: 111 # Numerical central FD: d_acceptance/dU ≈ (a(u+ε) - a(u-ε)) / (2ε) 112 eps = self._fd_eps 113 a_plus = self._acceptance_proba(x_batch, u_arr + eps) 114 a_minus = self._acceptance_proba(x_batch, u_arr - eps) 115 d_acceptance_du = (a_plus - a_minus) / (2.0 * eps) 116 117 # df/du = d_acceptance/du * (loss - revenue) - acceptance * premium 118 return d_acceptance_du * (loss - revenue) - acceptance * premium
Pricing objective backed by trained ML models.
$$f(u; x) = a(x,u) \cdot (\hat{Y}(x) - u \cdot p(x))$$
where $$a(x,u)$$ is the acceptance probability (sklearn Pipeline or XGBClassifier),
$$\hat{Y}(x)$$ is the expected financial loss (LinearRegression or XGBRegressor),
and $$p(x)$$ is the policy premium extracted from column premium_col of x.
acceptance_model expects a DataFrame with acceptance_state_cols + ["U"].
loss_model expects a DataFrame with loss_cols.
If u_coef is provided, the analytical gradient
$$da/dU = a(1-a) \cdot u_{coef}$$ is used (GLM path).
Otherwise numerical central finite differences are used (XGBoost path).
44 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 45 """Compute mean objective value across batch.""" 46 x_arr = np.asarray(x_batch, dtype=float) 47 if x_arr.ndim != 2: 48 raise ValueError("x_batch must be 2D.") 49 theta_arr = np.asarray(theta, dtype=float) 50 u_batch = self.policy.value(theta_arr, x_arr) 51 return float(np.mean(self._value_batch(x_arr, u_batch)))
Compute mean objective value across batch.
53 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 54 """Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta).""" 55 x_arr = np.asarray(x_batch, dtype=float) 56 if x_arr.ndim != 2: 57 raise ValueError("x_batch must be 2D.") 58 theta_arr = np.asarray(theta, dtype=float) 59 u_batch = self.policy.value(theta_arr, x_arr) 60 grad_u = self._grad_u_batch(x_arr, u_batch) 61 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
Compute theta-gradient via chain rule: df/dtheta = mean(df/du * du/dtheta).
63 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 64 """Compute mean objective value at a fixed action u.""" 65 x_arr = np.asarray(x_batch, dtype=float) 66 if x_arr.ndim != 2: 67 raise ValueError("x_batch must be 2D.") 68 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 69 return float(np.mean(self._value_batch(x_arr, u_arr)))
Compute mean objective value at a fixed action u.
16@dataclass(frozen=True) 17class PlantedLogisticObjective(Objective): 18 """Convex logistic objective with known optimum $$u^*$$ for algorithm validation. 19 20 $$L(u; x) = \\log(1 + e^z) - p^*(x) z$$ where $$z = \\alpha u + \\beta^\\top x + b$$. 21 """ 22 23 policy: Policy 24 alpha: float 25 beta: np.ndarray 26 bias: float 27 u_star: float 28 29 def __post_init__(self) -> None: 30 alpha = float(self.alpha) 31 beta = np.asarray(self.beta, dtype=float) 32 bias = float(self.bias) 33 u_star = float(self.u_star) 34 if alpha == 0.0: 35 raise ValueError("alpha must be nonzero for a unique optimum.") 36 object.__setattr__(self, "alpha", alpha) 37 object.__setattr__(self, "beta", beta) 38 object.__setattr__(self, "bias", bias) 39 object.__setattr__(self, "u_star", u_star) 40 41 @classmethod 42 def from_parameters( 43 cls, 44 policy: Policy, 45 alpha: float, 46 beta: np.ndarray | Sequence[float], 47 bias: float, 48 u_star: float, 49 ) -> "PlantedLogisticObjective": 50 """Create objective from parameter values.""" 51 return cls( 52 policy=policy, 53 alpha=float(alpha), 54 beta=np.asarray(beta, dtype=float), 55 bias=float(bias), 56 u_star=float(u_star), 57 ) 58 59 def optimal_u(self) -> float: 60 """Return the planted optimal action value.""" 61 return float(self.u_star) 62 63 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 64 """Compute mean objective value across batch.""" 65 x_arr = np.asarray(x_batch, dtype=float) 66 if x_arr.ndim != 2: 67 raise ValueError("x_batch must be a 2D array.") 68 u_batch = self.policy.value(theta, x_arr) 69 values = self._value_batch(x_arr, u_batch) 70 return float(np.mean(values)) 71 72 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 73 """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.""" 74 x_arr = np.asarray(x_batch, dtype=float) 75 if x_arr.ndim != 2: 76 raise ValueError("x_batch must be a 2D array.") 77 theta_arr = np.asarray(theta, dtype=float) 78 u_batch = self.policy.value(theta_arr, x_arr) 79 grad_u = self._grad_u_batch(x_arr, u_batch) 80 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u) 81 82 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 83 """Compute mean objective value at a fixed action u.""" 84 x_arr = np.asarray(x_batch, dtype=float) 85 if x_arr.ndim != 2: 86 raise ValueError("x_batch must be a 2D array.") 87 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 88 values = self._value_batch(x_arr, u_arr) 89 return float(np.mean(values)) 90 91 def _value_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray: 92 """Compute objective values for batch of (x, u) pairs.""" 93 beta_x = x_array @ self.beta[: x_array.shape[1]] 94 z = self.alpha * u_array + beta_x + self.bias 95 z_star = self.alpha * self.u_star + beta_x + self.bias 96 p_star = _sigmoid(z_star) 97 return np.logaddexp(0.0, z) - p_star * z 98 99 def _grad_u_batch(self, x_array: np.ndarray, u_array: np.ndarray) -> np.ndarray: 100 """Compute gradient w.r.t. u for batch of (x, u) pairs.""" 101 beta_x = x_array @ self.beta[: x_array.shape[1]] 102 z = self.alpha * u_array + beta_x + self.bias 103 z_star = self.alpha * self.u_star + beta_x + self.bias 104 p = _sigmoid(z) 105 p_star = _sigmoid(z_star) 106 return self.alpha * (p - p_star)
Convex logistic objective with known optimum $$u^*$$ for algorithm validation.
$$L(u; x) = \log(1 + e^z) - p^*(x) z$$ where $$z = \alpha u + \beta^\top x + b$$.
41 @classmethod 42 def from_parameters( 43 cls, 44 policy: Policy, 45 alpha: float, 46 beta: np.ndarray | Sequence[float], 47 bias: float, 48 u_star: float, 49 ) -> "PlantedLogisticObjective": 50 """Create objective from parameter values.""" 51 return cls( 52 policy=policy, 53 alpha=float(alpha), 54 beta=np.asarray(beta, dtype=float), 55 bias=float(bias), 56 u_star=float(u_star), 57 )
Create objective from parameter values.
59 def optimal_u(self) -> float: 60 """Return the planted optimal action value.""" 61 return float(self.u_star)
Return the planted optimal action value.
63 def value(self, theta: np.ndarray, x_batch: np.ndarray) -> float: 64 """Compute mean objective value across batch.""" 65 x_arr = np.asarray(x_batch, dtype=float) 66 if x_arr.ndim != 2: 67 raise ValueError("x_batch must be a 2D array.") 68 u_batch = self.policy.value(theta, x_arr) 69 values = self._value_batch(x_arr, u_batch) 70 return float(np.mean(values))
Compute mean objective value across batch.
72 def grad(self, theta: np.ndarray, x_batch: np.ndarray) -> np.ndarray: 73 """Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.""" 74 x_arr = np.asarray(x_batch, dtype=float) 75 if x_arr.ndim != 2: 76 raise ValueError("x_batch must be a 2D array.") 77 theta_arr = np.asarray(theta, dtype=float) 78 u_batch = self.policy.value(theta_arr, x_arr) 79 grad_u = self._grad_u_batch(x_arr, u_batch) 80 return _theta_grad_from_u_grad(self.policy, theta_arr, x_arr, grad_u)
Compute theta-gradient via chain rule: df/dtheta = df/du * du/dtheta.
82 def value_at_u(self, x_batch: np.ndarray, u: float) -> float: 83 """Compute mean objective value at a fixed action u.""" 84 x_arr = np.asarray(x_batch, dtype=float) 85 if x_arr.ndim != 2: 86 raise ValueError("x_batch must be a 2D array.") 87 u_arr = np.full(x_arr.shape[0], float(u), dtype=float) 88 values = self._value_batch(x_arr, u_arr) 89 return float(np.mean(values))
Compute mean objective value at a fixed action u.
49def optimal_u(objective: "Objective") -> float | None: 50 """Return optimal action u* if the objective exposes it. 51 52 For objectives with a known optimum (e.g., PlantedLogisticObjective), 53 this returns the optimal action value. New objectives can expose this 54 by implementing an `optimal_u() -> float` method. 55 56 Args: 57 objective: A theta-level objective. 58 59 Returns: 60 The optimal action value if available, otherwise None. 61 """ 62 optimal_fn = getattr(objective, "optimal_u", None) 63 if callable(optimal_fn): 64 result = optimal_fn() 65 if result is not None: 66 return float(result) 67 u_star_attr = getattr(objective, "u_star", None) 68 if u_star_attr is not None: 69 return float(u_star_attr) 70 return None
Return optimal action u* if the objective exposes it.
For objectives with a known optimum (e.g., PlantedLogisticObjective),
this returns the optimal action value. New objectives can expose this
by implementing an optimal_u() -> float method.
Args: objective: A theta-level objective.
Returns: The optimal action value if available, otherwise None.