Files
codex-controller-loop/src/model/policy.rs
eric 4a5c1d3c4b
Some checks failed
distribution-gate / distribution-gate (push) Failing after 1m57s
feat: different modules per task
2026-04-04 19:29:44 +02:00

782 lines
25 KiB
Rust

use anyhow::{bail, Context, Result};
use serde::{de::DeserializeOwned, Deserialize, Serialize};
use serde_json::Value;
use toon_format::decode_default;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct RoutingPolicy {
pub policy: ModelRolePolicy,
pub routing: RoutingRules,
pub gate: PolicyGate,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct ModelRolePolicy {
pub router: String,
#[serde(alias = "research")]
pub info: String,
#[serde(alias = "builder")]
pub build: String,
pub escalate_to: String,
pub conf_threshold: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(default)]
pub struct RoutingRules {
pub decision_rules: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct PolicyGate {
pub require_fields: Vec<String>,
pub fail_if_conf_less: f64,
pub if_conflict_or_missing: String,
pub if_test_failure: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PolicyDecision {
pub model: String,
pub reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct HandoffEnvelope {
#[serde(alias = "id")]
pub task_id: Option<StringCell>,
#[serde(alias = "from", alias = "src")]
pub from_model: Option<StringCell>,
#[serde(alias = "to", alias = "dst")]
pub to_model: Option<StringCell>,
#[serde(alias = "obj")]
pub objective: Option<String>,
pub scope: Option<String>,
pub constraints: Option<Vec<String>>,
pub current_state: Option<String>,
#[serde(alias = "refs")]
pub artifact_refs: Option<Vec<String>>,
pub expected_output: Option<Value>,
pub acceptance_criteria: Option<Vec<String>>,
pub risks: Option<Vec<String>>,
pub fallback_triggers: Option<Vec<String>>,
pub confidence_threshold: Option<f64>,
#[serde(alias = "confidence")]
pub conf: Option<f64>,
pub assumptions_made: Option<Vec<String>>,
pub open_questions: Option<Vec<String>>,
pub failed_checks: Option<Vec<String>>,
#[serde(default)]
pub context: HandoffContext,
#[serde(default)]
pub out: HandoffOut,
#[serde(default)]
pub criteria: HandoffCriteria,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StringCell {
Value(String),
Cell { value: String },
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case", default)]
pub struct HandoffContext {
#[serde(default)]
pub state: Option<String>,
#[serde(default)]
pub refs: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case", default)]
pub struct HandoffOut {
#[serde(default)]
pub alternatives: Option<u32>,
#[serde(default)]
pub recommended: bool,
#[serde(default)]
pub cite_sources: bool,
#[serde(default)]
pub format: Option<String>,
#[serde(default)]
pub diff_summary: Option<bool>,
#[serde(default)]
pub files: Vec<String>,
#[serde(default)]
pub tests: Vec<String>,
#[serde(default)]
pub risks: Option<String>,
#[serde(default)]
pub open_questions: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
#[serde(rename_all = "kebab-case", default)]
pub struct HandoffCriteria {
#[serde(default)]
pub must: Vec<String>,
#[serde(default)]
pub fail: Vec<String>,
#[serde(default)]
pub optional: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct ReviewEnvelope {
pub from: StringCell,
pub to: StringCell,
pub review: ReviewPayload,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case", default)]
pub struct ReviewPayload {
#[serde(default)]
pub task: Option<String>,
#[serde(default)]
pub conf: Option<f64>,
#[serde(default)]
pub findings: Vec<String>,
#[serde(default)]
pub required: Vec<String>,
#[serde(default)]
pub gate: Option<String>,
}
impl Default for RoutingPolicy {
fn default() -> Self {
Self {
policy: ModelRolePolicy::default(),
routing: RoutingRules::default(),
gate: PolicyGate {
require_fields: vec![
"task_id".to_string(),
"from_model".to_string(),
"to_model".to_string(),
"objective".to_string(),
"scope".to_string(),
"constraints".to_string(),
"current_state".to_string(),
"artifact_refs".to_string(),
"expected_output".to_string(),
"acceptance_criteria".to_string(),
"risks".to_string(),
"fallback_triggers".to_string(),
"conf".to_string(),
"assumptions_made".to_string(),
"open_questions".to_string(),
"failed_checks".to_string(),
],
fail_if_conf_less: 0.85,
if_conflict_or_missing: "reroute_to_router".to_string(),
if_test_failure: "reroute_to_builder_once_then_router".to_string(),
},
}
}
}
impl Default for ModelRolePolicy {
fn default() -> Self {
Self {
router: "gpt-5.4".to_string(),
info: "gpt-5.4-mini".to_string(),
build: "gpt-5.3-codex-spark".to_string(),
escalate_to: "gpt-5.4".to_string(),
conf_threshold: 0.85,
}
}
}
impl Default for StringCell {
fn default() -> Self {
Self::Value(String::new())
}
}
impl StringCell {
pub fn as_str(&self) -> &str {
match self {
Self::Value(value) => value,
Self::Cell { value } => value,
}
}
pub fn is_non_empty(&self) -> bool {
!self.as_str().trim().is_empty()
}
}
impl RoutingPolicy {
pub fn from_toon(content: &str) -> Result<Self> {
parse_toon(content)
}
pub fn parse_route(&self, handoff: &HandoffEnvelope) -> Result<PolicyDecision> {
handoff.validate_required_fields(self)?;
let conf_threshold = handoff.confidence_threshold.unwrap_or(self.policy.conf_threshold);
let effective_fail_threshold = self.gate.fail_if_conf_less.max(conf_threshold).max(self.policy.conf_threshold);
let conf = handoff.conf.unwrap_or(1.0);
if conf < effective_fail_threshold {
return Ok(PolicyDecision {
model: self.policy.escalate_to.clone(),
reason: "confidence below configured threshold".to_string(),
});
}
if handoff.is_security_or_reliability_sensitive() || handoff.is_contradictory_or_ambiguous() {
return Ok(PolicyDecision {
model: self.policy.escalate_to.clone(),
reason: "risk/contradiction or ambiguous classification requires router".to_string(),
});
}
if let Some(to_model) = handoff.to_model.as_ref().map(StringCell::as_str) {
let scope = handoff.scope.as_deref().unwrap_or_default();
let objective = handoff.objective.as_deref().unwrap_or_default();
if is_allowed_model(self, to_model)
&& (is_builder_scope(scope) || is_info_scope(scope) || is_router_scope(scope))
{
return Ok(PolicyDecision {
model: to_model.to_string(),
reason: "handoff destination honored".to_string(),
});
}
if is_allowed_model(self, to_model)
&& (is_builder_scope(objective) || is_info_scope(objective) || is_router_scope(objective))
{
return Ok(PolicyDecision {
model: to_model.to_string(),
reason: "handoff destination honored".to_string(),
});
}
}
let scope = handoff.scope.as_deref().unwrap_or_default();
let objective = handoff.objective.as_deref().unwrap_or_default();
let model = if is_router_scope(scope) || is_router_scope(objective) {
self.policy.router.clone()
} else if is_info_scope(scope) || is_info_scope(objective) {
self.policy.info.clone()
} else if is_builder_scope(scope) || is_builder_scope(objective) {
self.policy.build.clone()
} else {
self.routing
.decision_rules
.iter()
.find_map(|raw_rule| {
parse_rule(raw_rule).and_then(|rule| {
rule.matches(handoff).then_some(rule.model.clone())
})
})
.unwrap_or_else(|| self.policy.router.clone())
};
let reason = if model == self.policy.info {
"matched information-gathering scope".to_string()
} else if model == self.policy.build {
"matched implementation/debugging scope".to_string()
} else if model == self.policy.router {
"matched architectural/decision scope".to_string()
} else {
format!(
"matched decision rule {}",
handoff.scope.as_deref().unwrap_or("unknown")
)
};
Ok(PolicyDecision { model, reason })
}
}
impl HandoffEnvelope {
pub fn from_toon(content: &str) -> Result<Self> {
parse_toon(content)
}
pub fn validate_required_fields(&self, policy: &RoutingPolicy) -> Result<()> {
let mut missing = Vec::new();
for field in &policy.gate.require_fields {
if !self.has_required_field(field) {
missing.push(field.to_string());
}
}
if !missing.is_empty() {
missing.sort();
missing.dedup();
bail!("missing required handoff fields: {}", missing.join(", "));
}
if !self.has_required_contract() {
bail!(
"handoff must include acceptance_criteria and verification fields (assumptions_made/open_questions/failed_checks)"
);
}
if let Some(conf) = self.conf
&& !(0.0..=1.0).contains(&conf)
{
bail!("confidence value must be normalized in 0.0..1.0, got {conf}");
}
Ok(())
}
fn has_required_field(&self, field: &str) -> bool {
match field {
"task_id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty),
"from_model" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty),
"to_model" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty),
"objective" => self
.objective
.as_ref()
.is_some_and(|value| !value.trim().is_empty()),
"scope" => self
.scope
.as_ref()
.is_some_and(|value| !value.trim().is_empty()),
"constraints" => self.constraints.as_ref().is_some_and(|values| !values.is_empty()),
"current_state" => self
.current_state
.as_ref()
.is_some_and(|value| !value.trim().is_empty())
|| self.context.state.as_ref().is_some_and(|value| !value.trim().is_empty()),
"artifact_refs" => self
.artifact_refs
.as_ref()
.is_some_and(|values| !values.is_empty())
|| !self.context.refs.is_empty(),
"expected_output" => self.expected_output.as_ref().is_some_and(|value| !value.is_null()),
"acceptance_criteria" => {
self.acceptance_criteria.as_ref().is_some_and(|values| !values.is_empty())
}
"risks" => self.risks.as_ref().is_some_and(|values| !values.is_empty()),
"fallback_triggers" => {
self.fallback_triggers
.as_ref()
.is_some_and(|values| !values.is_empty())
}
"conf" => self.conf.is_some(),
"assumptions_made" => self
.assumptions_made
.as_ref()
.is_some_and(|values| !values.is_empty()),
"open_questions" => self
.open_questions
.as_ref()
.is_some_and(|values| !values.is_empty()),
"failed_checks" => self
.failed_checks
.as_ref()
.is_some_and(|values| !values.is_empty()),
// Compatibility aliases for earlier envelope versions.
"id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty),
"src" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty),
"dst" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty),
"obj" => self
.objective
.as_ref()
.is_some_and(|value| !value.trim().is_empty()),
"refs" => {
self.artifact_refs.as_ref().is_some_and(|values| !values.is_empty())
|| !self.context.refs.is_empty()
}
"out" => self.out.has_output(),
"criteria" => self.criteria.has_requirements(),
_ => true,
}
}
fn has_required_contract(&self) -> bool {
self.acceptance_criteria
.as_ref()
.is_some_and(|values| !values.is_empty())
&& self
.assumptions_made
.as_ref()
.is_some_and(|values| !values.is_empty())
&& self.open_questions.as_ref().is_some_and(|values| !values.is_empty())
&& self.failed_checks.as_ref().is_some_and(|values| !values.is_empty())
}
fn is_router_scope_hint(&self) -> bool {
let scope = self.scope.as_deref().unwrap_or("");
let objective = self.objective.as_deref().unwrap_or("");
is_router_scope(scope) || is_router_scope(objective)
}
fn is_info_scope_hint(&self) -> bool {
let scope = self.scope.as_deref().unwrap_or("");
let objective = self.objective.as_deref().unwrap_or("");
is_info_scope(scope) || is_info_scope(objective)
}
fn is_builder_scope_hint(&self) -> bool {
let scope = self.scope.as_deref().unwrap_or("");
let objective = self.objective.as_deref().unwrap_or("");
is_builder_scope(scope) || is_builder_scope(objective)
}
fn is_security_or_reliability_sensitive(&self) -> bool {
let haystack = format!(
"{} {} {} {}",
self.scope.as_deref().unwrap_or_default(),
self.objective.as_deref().unwrap_or_default(),
self.risks
.as_ref()
.map(|items| items.join(" "))
.unwrap_or_default(),
self.fallback_triggers
.as_ref()
.map(|items| items.join(" "))
.unwrap_or_default()
);
is_security_signal(&haystack) || self
.artifact_refs
.as_ref()
.is_some_and(|refs| refs.iter().any(|value| {
let normalized = value.to_ascii_lowercase();
normalized.contains("security") || normalized.contains("privacy")
}))
}
fn is_contradictory_or_ambiguous(&self) -> bool {
let haystack = format!(
"{} {} {}",
self.scope.as_deref().unwrap_or_default(),
self.objective.as_deref().unwrap_or_default(),
self.fallback_triggers
.as_ref()
.map(|items| items.join(" "))
.unwrap_or_default(),
);
let is_contradictory = self
.failed_checks
.as_ref()
.is_some_and(|checks| {
checks
.iter()
.any(|value| is_contradictory_signal(value))
});
let is_ambiguous = is_ambiguous_signal(&haystack);
is_contradictory || is_ambiguous
}
}
impl ReviewEnvelope {
pub fn from_toon(content: &str) -> Result<Self> {
parse_toon(content)
}
}
impl HandoffOut {
fn has_output(&self) -> bool {
self.alternatives.is_some()
|| self.recommended
|| self.cite_sources
|| self.format.is_some()
|| self.diff_summary.unwrap_or(false)
|| !self.files.is_empty()
|| !self.tests.is_empty()
|| self.risks.is_some()
|| self.open_questions.is_some()
}
}
impl HandoffCriteria {
fn has_requirements(&self) -> bool {
!self.must.is_empty() || !self.fail.is_empty()
}
}
#[derive(Debug, Clone)]
struct Rule {
left: String,
model: String,
}
impl Rule {
fn matches(&self, handoff: &HandoffEnvelope) -> bool {
if self
.left
.split("->")
.next()
.is_some_and(|left| contains_token_or_phrase(handoff.scope.as_deref().unwrap_or(""), left))
{
return true;
}
if contains_token_or_phrase(
handoff.objective.as_deref().unwrap_or(""),
&self.left,
) {
return true;
}
handoff
.constraints
.as_ref()
.into_iter()
.flat_map(|constraints| constraints.iter())
.chain(self.artifact_refs_or_fallback(handoff).iter())
.chain(handoff.context.refs.iter())
.any(|value| contains_token_or_phrase(value, &self.left))
}
fn artifact_refs_or_fallback<'a>(&self, handoff: &'a HandoffEnvelope) -> &'a Vec<String> {
handoff.artifact_refs.as_ref().unwrap_or(&handoff.context.refs)
}
}
fn parse_rule(raw: &str) -> Option<Rule> {
let mut split = raw.splitn(2, "=>");
let left = split.next()?.trim().to_ascii_lowercase();
let model = split.next()?.trim().to_string();
if left.is_empty() || model.is_empty() {
return None;
}
Some(Rule { left, model })
}
fn is_router_scope(scope: &str) -> bool {
is_scope_like(scope, &[
"architectural",
"architectural_decision",
"triage",
"tradeoff",
"ambiguous",
"ambiguity",
"security",
"privacy",
"migration",
"incident",
"reliability",
])
}
fn is_info_scope(scope: &str) -> bool {
is_scope_like(scope, &[
"research",
"information",
"information_gathering",
"docs",
"api",
"contract",
"comparison",
"search",
"options",
"lookup",
"investigate",
])
}
fn is_builder_scope(scope: &str) -> bool {
is_scope_like(scope, &[
"code",
"code_change",
"implementation",
"refactor",
"tests",
"test",
"debug",
"debugging",
"execution",
"implement",
"build",
])
}
fn is_scope_like(scope: &str, tokens: &[&str]) -> bool {
tokens.iter().any(|token| contains_token_or_phrase(scope, token))
}
fn is_security_signal(value: &str) -> bool {
is_token_or_phrase_in_set(
value,
&[
"security",
"privacy",
"migration",
"incident",
"data loss",
"data-loss",
"reliability",
"production",
],
)
}
fn is_ambiguous_signal(value: &str) -> bool {
is_token_or_phrase_in_set(value, &["ambiguous", "ambiguity", "underdetermined", "unclear"])
}
fn is_contradictory_signal(value: &str) -> bool {
is_token_or_phrase_in_set(value, &["contradictory", "conflict", "inconsistent", "mismatch"])
}
fn is_token_or_phrase_in_set(value: &str, tokens: &[&str]) -> bool {
let normalized = value.to_ascii_lowercase();
tokens.iter().any(|token| contains_token_or_phrase(&normalized, token))
}
fn is_allowed_model(policy: &RoutingPolicy, model: &str) -> bool {
let m = model.trim();
m == policy.policy.router || m == policy.policy.info || m == policy.policy.build || m == policy.policy.escalate_to
}
fn contains_token_or_phrase(target: &str, expected: &str) -> bool {
if expected.is_empty() {
return false;
}
let normalized = target.to_ascii_lowercase();
normalized.split(|c: char| !c.is_ascii_alphanumeric() && c != '_')
.any(|token| !token.is_empty() && token == expected)
|| normalized.contains(expected)
}
fn parse_toon<T: DeserializeOwned>(content: &str) -> Result<T> {
decode_default(content).context("failed to decode TOON payload")
}
#[cfg(test)]
mod tests {
use super::*;
const POLICY_TP: &str = r##"
policy:
router: "gpt-5.4"
info: "gpt-5.4-mini"
build: "gpt-5.3-codex-spark"
escalate_to: "gpt-5.4"
conf_threshold: 0.85
routing:
decision_rules[4]: "architectural_decision => gpt-5.4", "debugging_or_testing => gpt-5.3-codex-spark", "docs_api_lookup_or_comparison => gpt-5.4-mini", "security_privacy_migration_incident => gpt-5.4"
gate:
require_fields[15]: "task_id", "from_model", "to_model", "objective", "scope", "constraints", "current_state", "artifact_refs", "expected_output", "acceptance_criteria", "risks", "fallback_triggers", "conf", "assumptions_made", "open_questions", "failed_checks"
fail_if_conf_less: 0.85
if_conflict_or_missing: "reroute_to_router"
if_test_failure: "reroute_to_builder_once_then_router"
"##;
const RESEARCH_ENVELOPE: &str = r##"
task_id:
value: "task-102"
from_model: "gpt-5.4"
to_model: "gpt-5.4-mini"
objective: "Find current API contract changes needed for feature X."
scope: "information_gathering"
constraints[3]: "no code edits", "cite source path", "stack: rust"
current_state: "prior_work: baseline-impl-v2"
artifact_refs[2]: "docs/api.md#120-210", "issues/412"
expected_output:
alternatives: ["A", "B"]
recommended_choice: "A"
citations: ["docs/api.md#120-210"]
acceptance_criteria[3]: "produce >=2 options", "explain tradeoffs", "state confidence"
risks[1]: "none"
fallback_triggers[1]: "none"
conf: 0.88
assumptions_made[1]: "contracts are stable"
open_questions[1]: "none"
failed_checks[1]: "none"
"##;
const IMPLEMENTER_ENVELOPE: &str = r##"
task_id:
value: "task-102"
from_model: "gpt-5.4"
to_model: "gpt-5.3-codex-spark"
objective: "Implement the chosen API adjustment with tests."
scope: "code_change"
constraints[4]: "edit only listed files", "maintain existing behavior", "no_semver_break", "no code edits"
current_state: "decision_outcome: option_a_selected"
artifact_refs[3]: "research/task-102.toon", "src/current_api.ts", "tests/api.test.ts"
expected_output:
code_diff_summary: "..."
test_plan: ["/ path"]
risk_notes: ["none"]
self_check: "ok"
acceptance_criteria[3]: "tests added/updated", "no breaking change", "backwards behavior unchanged"
risks[1]: "none"
fallback_triggers[1]: "implementation validation required"
conf: 0.9
assumptions_made[1]: "prior research is current"
open_questions[1]: "none"
failed_checks[1]: "none"
"##;
const REVIEW_ENVELOPE: &str = r##"
from:
value: "gpt-5.3-codex-spark"
to:
value: "gpt-5.4"
review:
task: "task-102"
conf: 0.82
findings[3]: "assumptions", "risks", "validation_status"
required[3]: "evidence", "counterexamples", "alternative_if_incomplete"
gate: "approve | revise | escalate"
"##;
#[test]
fn parses_policy_toon_and_routes_by_scope() {
let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
let handoff = HandoffEnvelope::from_toon(RESEARCH_ENVELOPE).expect("parse research");
assert_eq!(
policy
.parse_route(&handoff)
.expect("route")
.model,
"gpt-5.4-mini"
);
}
#[test]
fn validates_required_envelope_fields() {
let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
let handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
assert!(handoff.validate_required_fields(&policy).is_ok());
}
#[test]
fn escalates_when_confidence_below_threshold() {
let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
handoff.conf = Some(0.6);
let routed = policy.parse_route(&handoff).expect("route");
assert_eq!(routed.model, "gpt-5.4");
}
#[test]
fn escalates_on_security_signal() {
let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy");
let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer");
handoff.risks = Some(vec!["privacy".to_string(), "migration".to_string()]);
let routed = policy.parse_route(&handoff).expect("route");
assert_eq!(routed.model, "gpt-5.4");
}
#[test]
fn parses_review_envelope_toon() {
let review = ReviewEnvelope::from_toon(REVIEW_ENVELOPE).expect("parse review");
assert_eq!(review.from.as_str(), "gpt-5.3-codex-spark");
assert_eq!(review.review.task.as_deref(), Some("task-102"));
}
}