From 4a5c1d3c4bbd8150e91b8245bde6516ad4b4723d Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 4 Apr 2026 19:29:44 +0200 Subject: [PATCH] feat: different modules per task --- .agent/controller-loop/policy.toon | 13 + src/controller/engine.rs | 280 ++++++++++- src/controller/executor.rs | 3 +- src/controller/planner.rs | 3 +- src/model/mod.rs | 7 + src/model/policy.rs | 781 +++++++++++++++++++++++++++++ 6 files changed, 1080 insertions(+), 7 deletions(-) create mode 100644 .agent/controller-loop/policy.toon create mode 100644 src/model/policy.rs diff --git a/.agent/controller-loop/policy.toon b/.agent/controller-loop/policy.toon new file mode 100644 index 0000000..9276c99 --- /dev/null +++ b/.agent/controller-loop/policy.toon @@ -0,0 +1,13 @@ +policy: + router: "gpt-5.4" + info: "gpt-5.4-mini" + build: "gpt-5.3-codex-spark" + escalate_to: "gpt-5.4" + conf_threshold: 0.85 +routing: + decision_rules[6]: "architectural_decision => gpt-5.4", "bug_triage => gpt-5.4", "tradeoff_decision => gpt-5.4", "ambiguous_task => gpt-5.4", "docs_api_lookup_or_comparison => gpt-5.4-mini", "debugging_or_testing => gpt-5.3-codex-spark" +gate: + require_fields[15]: "task_id", "from_model", "to_model", "objective", "scope", "constraints", "current_state", "artifact_refs", "expected_output", "acceptance_criteria", "risks", "fallback_triggers", "conf", "assumptions_made", "open_questions", "failed_checks" + fail_if_conf_less: 0.85 + if_conflict_or_missing: "reroute_to_router" + if_test_failure: "reroute_to_builder_once_then_router" diff --git a/src/controller/engine.rs b/src/controller/engine.rs index f433c50..d9b527f 100644 --- a/src/controller/engine.rs +++ b/src/controller/engine.rs @@ -1,18 +1,22 @@ -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::sync::mpsc::{Receiver, Sender, TryRecvError}; use anyhow::Result; +use serde_json::json; use crate::app::{AppEvent, ControlCommand}; use crate::controller::{executor, goal_checker, planner, verifier}; use crate::model::{ - ControllerPhase, GoalStatus, PlannerResponse, SessionEntry, SessionSource, SessionStream, - StepStatus, TaskConfig, + ControllerPhase, GoalStatus, HandoffContext, HandoffCriteria, HandoffEnvelope, HandoffOut, + PlannerResponse, RoutingPolicy, SessionEntry, SessionSource, SessionStream, StepStatus, StringCell, + TaskConfig, DEFAULT_RUN_MODEL, }; use crate::prompt; use crate::repo; use crate::storage::toon; +const DEFAULT_CONTROLLER_POLICY_PATH: &str = ".agent/controller-loop/policy.toon"; + pub fn runtime_loop( repo_root: PathBuf, config: TaskConfig, @@ -20,6 +24,7 @@ pub fn runtime_loop( event_tx: Sender, ) -> Result<()> { toon::ensure_controller_files(&config)?; + let routing_policy = load_controller_policy(); let _ = event_tx.send(AppEvent::Session(SessionEntry { source: SessionSource::Controller, stream: SessionStream::Status, @@ -160,8 +165,16 @@ pub fn runtime_loop( body: "Refining plan".to_string(), run_id: repo::next_run_id(), })); + let planner_model = pick_model_for_planner(&routing_policy, &state, &plan, &config); plan = - planner::refine_without_user_input(&repo_root, &config, &plan, &state, &event_tx)?; + planner::refine_without_user_input( + &repo_root, + &config, + &plan, + &state, + &planner_model, + &event_tx, + )?; state.replan_required = false; toon::write_plan(&config.plan_file, &plan)?; toon::write_state(&config.state_file, &state)?; @@ -213,7 +226,16 @@ pub fn runtime_loop( toon::write_state(&config.state_file, &state)?; emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state); - let exec = executor::implement(&repo_root, &config, &state, &plan, &step, &event_tx)?; + let executor_model = pick_model_for_step(&routing_policy, &state, &step, &config); + let exec = executor::implement( + &repo_root, + &config, + &state, + &plan, + &step, + &executor_model, + &event_tx, + )?; if goal_checker::needs_goal_clarification(&exec) { state.phase = ControllerPhase::Planning; state.set_stop_reason(format!( @@ -323,6 +345,254 @@ fn emit_snapshot( }); } +fn pick_model_for_planner( + policy: &Option, + state: &crate::model::ControllerState, + plan: &crate::model::Plan, + config: &TaskConfig, +) -> String { + let fallback_model = state.run_model(); + if fallback_model != DEFAULT_RUN_MODEL { + return fallback_model.to_string(); + } + + let handoff = build_planning_handoff(config, state, plan, fallback_model); + select_model_with_policy(policy.as_ref(), &handoff, fallback_model) +} + +fn pick_model_for_step( + policy: &Option, + state: &crate::model::ControllerState, + step: &crate::model::PlanStep, + config: &TaskConfig, +) -> String { + let fallback_model = state.run_model(); + if fallback_model != DEFAULT_RUN_MODEL { + return fallback_model.to_string(); + } + + let handoff = build_execution_handoff(config, state, step, fallback_model); + select_model_with_policy(policy.as_ref(), &handoff, fallback_model) +} + +fn select_model_with_policy( + policy: Option<&RoutingPolicy>, + handoff: &HandoffEnvelope, + fallback_model: &str, +) -> String { + let policy = match policy { + Some(policy) => policy, + None => return fallback_model.to_string(), + }; + + policy + .parse_route(handoff) + .map(|decision| decision.model) + .unwrap_or_else(|_| policy.policy.escalate_to.to_string()) +} + +fn load_controller_policy() -> Option { + let policy_path = repo::absolute(Path::new(DEFAULT_CONTROLLER_POLICY_PATH)); + if !policy_path.exists() { + return None; + } + let policy = std::fs::read_to_string(policy_path).ok()?; + RoutingPolicy::from_toon(&policy).ok() +} + +fn build_planning_handoff( + config: &TaskConfig, + state: &crate::model::ControllerState, + plan: &crate::model::Plan, + model: &str, +) -> HandoffEnvelope { + HandoffEnvelope { + task_id: Some(StringCell::Value(format!("{}-planner", config.controller_id()))), + from_model: Some(StringCell::Value(model.to_string())), + to_model: Some(StringCell::Value("gpt-5.4".to_string())), + objective: Some("Refine execution plan using current controller context.".to_string()), + scope: Some("architectural_decision".to_string()), + constraints: Some(vec![ + "plan-generation".to_string(), + "minimal-delta".to_string(), + "no-questions".to_string(), + ]), + current_state: Some(format!( + "controller={} iteration={} phase={:?} replan_required={}", + config.controller_id(), + state.iteration, + state.phase, + state.replan_required + )), + context: HandoffContext { + refs: vec![ + config.plan_file.to_string_lossy().to_string(), + format!("total_steps:{}", plan.steps.len()), + ], + state: None, + }, + artifact_refs: Some(vec![ + config.plan_file.to_string_lossy().to_string(), + config.goal_file.to_string_lossy().to_string(), + format!("controller={}", config.controller_id()), + ]), + expected_output: Some(json!({ + "single_plan_delta": true, + "notes": ["minimal delta", + "stable ids", + "no unchanged pending steps", + ] + })), + acceptance_criteria: Some(vec![ + "minimal-plan-delta".to_string(), + "stable-step-ids".to_string(), + "no-unchanged-pending-step-duplication".to_string(), + ]), + risks: Some(vec![ + "router-policy-governance".to_string(), + ]), + fallback_triggers: Some(vec![ + "low-confidence".to_string(), + "scope-mismatch".to_string(), + ]), + confidence_threshold: Some(0.85), + conf: Some(1.0), + assumptions_made: Some(vec![ + "existing plan is mostly valid".to_string(), + "planner output can remain in-place".to_string(), + ]), + open_questions: Some(vec!["none".to_string()]), + failed_checks: Some(Vec::new()), + out: HandoffOut { + alternatives: Some(1), + recommended: true, + cite_sources: false, + format: Some("json".to_string()), + diff_summary: Some(false), + files: Vec::new(), + tests: Vec::new(), + risks: Some("none".to_string()), + open_questions: Some("none".to_string()), + }, + criteria: HandoffCriteria { + must: vec!["single_plan_delta".to_string()], + fail: Vec::new(), + optional: Vec::new(), + }, + } +} + +fn build_execution_handoff( + config: &TaskConfig, + state: &crate::model::ControllerState, + step: &crate::model::PlanStep, + model: &str, +) -> HandoffEnvelope { + let mut constraints = vec![ + "execution".to_string(), + "minimal-change".to_string(), + format!("step-status:{:?}", step.status), + ]; + constraints.extend( + step.dependencies + .iter() + .map(|dependency| format!("dependency:{dependency}")), + ); + + let title = if step.title.trim().is_empty() { + "execute step".to_string() + } else { + step.title.clone() + }; + + HandoffEnvelope { + task_id: Some(StringCell::Value(format!("{}::{}", config.controller_id(), step.id))), + from_model: Some(StringCell::Value(model.to_string())), + to_model: Some(StringCell::Value("gpt-5.4".to_string())), + objective: Some(title), + scope: Some(infer_step_scope(step).to_string()), + constraints: Some(constraints), + current_state: Some(format!("controller={} iteration={}", config.controller_id(), state.iteration)), + artifact_refs: Some(vec![ + format!("step:{}", step.id), + config.goal_file.to_string_lossy().to_string(), + config.plan_file.to_string_lossy().to_string(), + ]), + expected_output: Some(json!({ + "code_diff_summary": "requested changes should be implemented", + "test_plan": ["targeted unit tests", "targeted integration checks"], + "risk_notes": ["validate behavior and no regressions"], + "self_check": "status/diff sanity checks" + })), + acceptance_criteria: Some(vec![ + "implemented changes pass".to_string(), + "targeted test commands executed".to_string(), + "behavior remains stable".to_string(), + ]), + risks: Some(vec![ + if step.status == crate::model::StepStatus::Blocked { + "blocked-step-resume".to_string() + } else { + "implementation-risk".to_string() + }, + ]), + fallback_triggers: Some(vec![ + "failing_tests_or_verification".to_string(), + "contradictory-findings".to_string(), + ]), + confidence_threshold: Some(0.85), + conf: Some(1.0), + assumptions_made: Some(vec![ + "existing behavior should not regress".to_string(), + "targeted files are sufficient".to_string(), + ]), + open_questions: Some(vec!["none".to_string()]), + failed_checks: Some(Vec::new()), + context: HandoffContext { + refs: vec![ + format!("step:{}", step.id), + config.goal_file.to_string_lossy().to_string(), + ], + state: Some(format!("controller={} iteration={}", config.controller_id(), state.iteration)), + }, + out: HandoffOut { + alternatives: None, + recommended: true, + cite_sources: false, + format: None, + diff_summary: Some(true), + files: step.inputs.clone(), + tests: Vec::new(), + risks: Some("none".to_string()), + open_questions: Some("none".to_string()), + }, + criteria: HandoffCriteria { + must: vec!["implementation_complete".to_string()], + fail: vec!["no_regression_for_existing_paths".to_string()], + optional: Vec::new(), + }, + } +} + +fn infer_step_scope(step: &crate::model::PlanStep) -> &'static str { + let haystack = step.title.to_lowercase() + " " + &step.purpose.to_lowercase(); + if haystack.contains("research") + || haystack.contains("investigate") + || haystack.contains("lookup") + || haystack.contains("api") + || haystack.contains("contract") + || haystack.contains("compare") + { + "information_gathering" + } else if haystack.contains("test") || haystack.contains("lint") || haystack.contains("debug") { + "debugging_or_testing" + } else if haystack.contains("api") && haystack.contains("contract") { + "docs_api_lookup_or_comparison" + } else { + "code" + } +} + fn resumable_step( plan: &crate::model::Plan, state: &crate::model::ControllerState, diff --git a/src/controller/executor.rs b/src/controller/executor.rs index 4c716e0..e867b8f 100644 --- a/src/controller/executor.rs +++ b/src/controller/executor.rs @@ -15,6 +15,7 @@ pub fn implement( state: &ControllerState, plan: &Plan, step: &PlanStep, + model: &str, event_tx: &Sender, ) -> Result { let goal_md = toon::read_markdown(&config.goal_file)?; @@ -67,7 +68,7 @@ pub fn implement( repo_root, &prompt, &schema, - state.run_model(), + model, event_tx, SessionSource::Executor, Some(step.id.clone()), diff --git a/src/controller/planner.rs b/src/controller/planner.rs index 728eeee..9ac8ab8 100644 --- a/src/controller/planner.rs +++ b/src/controller/planner.rs @@ -14,6 +14,7 @@ pub fn refine_without_user_input( config: &TaskConfig, plan: &Plan, state: &ControllerState, + model: &str, event_tx: &Sender, ) -> Result { let goal_md = toon::read_markdown(&config.goal_file)?; @@ -60,7 +61,7 @@ pub fn refine_without_user_input( repo_root, &prompt, &schema, - state.run_model(), + model, event_tx, SessionSource::Planner, Some(config.controller_id()), diff --git a/src/model/mod.rs b/src/model/mod.rs index cf6c7ed..ea70950 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -2,6 +2,7 @@ mod controller; mod plan; mod response; mod schema; +mod policy; mod session; mod usage; @@ -32,6 +33,12 @@ pub use self::schema::{ verification_check_schema, }; #[allow(unused_imports)] +pub use self::policy::{ + HandoffContext, HandoffCriteria, HandoffEnvelope, HandoffOut, ModelRolePolicy, + PolicyDecision, PolicyGate, ReviewEnvelope, ReviewPayload, RoutingPolicy, RoutingRules, + StringCell, +}; +#[allow(unused_imports)] pub use self::session::{ group_session_entries, SessionCursor, SessionEntry, SessionGroup, SessionSelection, SessionSource, SessionStream, diff --git a/src/model/policy.rs b/src/model/policy.rs new file mode 100644 index 0000000..5c4e149 --- /dev/null +++ b/src/model/policy.rs @@ -0,0 +1,781 @@ +use anyhow::{bail, Context, Result}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; +use serde_json::Value; +use toon_format::decode_default; + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct RoutingPolicy { + pub policy: ModelRolePolicy, + pub routing: RoutingRules, + pub gate: PolicyGate, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct ModelRolePolicy { + pub router: String, + #[serde(alias = "research")] + pub info: String, + #[serde(alias = "builder")] + pub build: String, + pub escalate_to: String, + pub conf_threshold: f64, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(default)] +pub struct RoutingRules { + pub decision_rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct PolicyGate { + pub require_fields: Vec, + pub fail_if_conf_less: f64, + pub if_conflict_or_missing: String, + pub if_test_failure: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PolicyDecision { + pub model: String, + pub reason: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct HandoffEnvelope { + #[serde(alias = "id")] + pub task_id: Option, + #[serde(alias = "from", alias = "src")] + pub from_model: Option, + #[serde(alias = "to", alias = "dst")] + pub to_model: Option, + #[serde(alias = "obj")] + pub objective: Option, + pub scope: Option, + pub constraints: Option>, + pub current_state: Option, + #[serde(alias = "refs")] + pub artifact_refs: Option>, + pub expected_output: Option, + pub acceptance_criteria: Option>, + pub risks: Option>, + pub fallback_triggers: Option>, + pub confidence_threshold: Option, + #[serde(alias = "confidence")] + pub conf: Option, + pub assumptions_made: Option>, + pub open_questions: Option>, + pub failed_checks: Option>, + + #[serde(default)] + pub context: HandoffContext, + #[serde(default)] + pub out: HandoffOut, + #[serde(default)] + pub criteria: HandoffCriteria, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum StringCell { + Value(String), + Cell { value: String }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case", default)] +pub struct HandoffContext { + #[serde(default)] + pub state: Option, + #[serde(default)] + pub refs: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case", default)] +pub struct HandoffOut { + #[serde(default)] + pub alternatives: Option, + #[serde(default)] + pub recommended: bool, + #[serde(default)] + pub cite_sources: bool, + #[serde(default)] + pub format: Option, + #[serde(default)] + pub diff_summary: Option, + #[serde(default)] + pub files: Vec, + #[serde(default)] + pub tests: Vec, + #[serde(default)] + pub risks: Option, + #[serde(default)] + pub open_questions: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case", default)] +pub struct HandoffCriteria { + #[serde(default)] + pub must: Vec, + #[serde(default)] + pub fail: Vec, + #[serde(default)] + pub optional: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct ReviewEnvelope { + pub from: StringCell, + pub to: StringCell, + pub review: ReviewPayload, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "kebab-case", default)] +pub struct ReviewPayload { + #[serde(default)] + pub task: Option, + #[serde(default)] + pub conf: Option, + #[serde(default)] + pub findings: Vec, + #[serde(default)] + pub required: Vec, + #[serde(default)] + pub gate: Option, +} + +impl Default for RoutingPolicy { + fn default() -> Self { + Self { + policy: ModelRolePolicy::default(), + routing: RoutingRules::default(), + gate: PolicyGate { + require_fields: vec![ + "task_id".to_string(), + "from_model".to_string(), + "to_model".to_string(), + "objective".to_string(), + "scope".to_string(), + "constraints".to_string(), + "current_state".to_string(), + "artifact_refs".to_string(), + "expected_output".to_string(), + "acceptance_criteria".to_string(), + "risks".to_string(), + "fallback_triggers".to_string(), + "conf".to_string(), + "assumptions_made".to_string(), + "open_questions".to_string(), + "failed_checks".to_string(), + ], + fail_if_conf_less: 0.85, + if_conflict_or_missing: "reroute_to_router".to_string(), + if_test_failure: "reroute_to_builder_once_then_router".to_string(), + }, + } + } +} + +impl Default for ModelRolePolicy { + fn default() -> Self { + Self { + router: "gpt-5.4".to_string(), + info: "gpt-5.4-mini".to_string(), + build: "gpt-5.3-codex-spark".to_string(), + escalate_to: "gpt-5.4".to_string(), + conf_threshold: 0.85, + } + } +} + +impl Default for StringCell { + fn default() -> Self { + Self::Value(String::new()) + } +} + +impl StringCell { + pub fn as_str(&self) -> &str { + match self { + Self::Value(value) => value, + Self::Cell { value } => value, + } + } + + pub fn is_non_empty(&self) -> bool { + !self.as_str().trim().is_empty() + } +} + +impl RoutingPolicy { + pub fn from_toon(content: &str) -> Result { + parse_toon(content) + } + + pub fn parse_route(&self, handoff: &HandoffEnvelope) -> Result { + handoff.validate_required_fields(self)?; + + let conf_threshold = handoff.confidence_threshold.unwrap_or(self.policy.conf_threshold); + let effective_fail_threshold = self.gate.fail_if_conf_less.max(conf_threshold).max(self.policy.conf_threshold); + let conf = handoff.conf.unwrap_or(1.0); + if conf < effective_fail_threshold { + return Ok(PolicyDecision { + model: self.policy.escalate_to.clone(), + reason: "confidence below configured threshold".to_string(), + }); + } + + if handoff.is_security_or_reliability_sensitive() || handoff.is_contradictory_or_ambiguous() { + return Ok(PolicyDecision { + model: self.policy.escalate_to.clone(), + reason: "risk/contradiction or ambiguous classification requires router".to_string(), + }); + } + + if let Some(to_model) = handoff.to_model.as_ref().map(StringCell::as_str) { + let scope = handoff.scope.as_deref().unwrap_or_default(); + let objective = handoff.objective.as_deref().unwrap_or_default(); + if is_allowed_model(self, to_model) + && (is_builder_scope(scope) || is_info_scope(scope) || is_router_scope(scope)) + { + return Ok(PolicyDecision { + model: to_model.to_string(), + reason: "handoff destination honored".to_string(), + }); + } + if is_allowed_model(self, to_model) + && (is_builder_scope(objective) || is_info_scope(objective) || is_router_scope(objective)) + { + return Ok(PolicyDecision { + model: to_model.to_string(), + reason: "handoff destination honored".to_string(), + }); + } + } + + let scope = handoff.scope.as_deref().unwrap_or_default(); + let objective = handoff.objective.as_deref().unwrap_or_default(); + + let model = if is_router_scope(scope) || is_router_scope(objective) { + self.policy.router.clone() + } else if is_info_scope(scope) || is_info_scope(objective) { + self.policy.info.clone() + } else if is_builder_scope(scope) || is_builder_scope(objective) { + self.policy.build.clone() + } else { + self.routing + .decision_rules + .iter() + .find_map(|raw_rule| { + parse_rule(raw_rule).and_then(|rule| { + rule.matches(handoff).then_some(rule.model.clone()) + }) + }) + .unwrap_or_else(|| self.policy.router.clone()) + }; + + let reason = if model == self.policy.info { + "matched information-gathering scope".to_string() + } else if model == self.policy.build { + "matched implementation/debugging scope".to_string() + } else if model == self.policy.router { + "matched architectural/decision scope".to_string() + } else { + format!( + "matched decision rule {}", + handoff.scope.as_deref().unwrap_or("unknown") + ) + }; + + Ok(PolicyDecision { model, reason }) + } +} + +impl HandoffEnvelope { + pub fn from_toon(content: &str) -> Result { + parse_toon(content) + } + + pub fn validate_required_fields(&self, policy: &RoutingPolicy) -> Result<()> { + let mut missing = Vec::new(); + + for field in &policy.gate.require_fields { + if !self.has_required_field(field) { + missing.push(field.to_string()); + } + } + + if !missing.is_empty() { + missing.sort(); + missing.dedup(); + bail!("missing required handoff fields: {}", missing.join(", ")); + } + + if !self.has_required_contract() { + bail!( + "handoff must include acceptance_criteria and verification fields (assumptions_made/open_questions/failed_checks)" + ); + } + + if let Some(conf) = self.conf + && !(0.0..=1.0).contains(&conf) + { + bail!("confidence value must be normalized in 0.0..1.0, got {conf}"); + } + + Ok(()) + } + + fn has_required_field(&self, field: &str) -> bool { + match field { + "task_id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty), + "from_model" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty), + "to_model" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty), + "objective" => self + .objective + .as_ref() + .is_some_and(|value| !value.trim().is_empty()), + "scope" => self + .scope + .as_ref() + .is_some_and(|value| !value.trim().is_empty()), + "constraints" => self.constraints.as_ref().is_some_and(|values| !values.is_empty()), + "current_state" => self + .current_state + .as_ref() + .is_some_and(|value| !value.trim().is_empty()) + || self.context.state.as_ref().is_some_and(|value| !value.trim().is_empty()), + "artifact_refs" => self + .artifact_refs + .as_ref() + .is_some_and(|values| !values.is_empty()) + || !self.context.refs.is_empty(), + "expected_output" => self.expected_output.as_ref().is_some_and(|value| !value.is_null()), + "acceptance_criteria" => { + self.acceptance_criteria.as_ref().is_some_and(|values| !values.is_empty()) + } + "risks" => self.risks.as_ref().is_some_and(|values| !values.is_empty()), + "fallback_triggers" => { + self.fallback_triggers + .as_ref() + .is_some_and(|values| !values.is_empty()) + } + "conf" => self.conf.is_some(), + "assumptions_made" => self + .assumptions_made + .as_ref() + .is_some_and(|values| !values.is_empty()), + "open_questions" => self + .open_questions + .as_ref() + .is_some_and(|values| !values.is_empty()), + "failed_checks" => self + .failed_checks + .as_ref() + .is_some_and(|values| !values.is_empty()), + // Compatibility aliases for earlier envelope versions. + "id" => self.task_id.as_ref().is_some_and(StringCell::is_non_empty), + "src" => self.from_model.as_ref().is_some_and(StringCell::is_non_empty), + "dst" => self.to_model.as_ref().is_some_and(StringCell::is_non_empty), + "obj" => self + .objective + .as_ref() + .is_some_and(|value| !value.trim().is_empty()), + "refs" => { + self.artifact_refs.as_ref().is_some_and(|values| !values.is_empty()) + || !self.context.refs.is_empty() + } + "out" => self.out.has_output(), + "criteria" => self.criteria.has_requirements(), + _ => true, + } + } + + fn has_required_contract(&self) -> bool { + self.acceptance_criteria + .as_ref() + .is_some_and(|values| !values.is_empty()) + && self + .assumptions_made + .as_ref() + .is_some_and(|values| !values.is_empty()) + && self.open_questions.as_ref().is_some_and(|values| !values.is_empty()) + && self.failed_checks.as_ref().is_some_and(|values| !values.is_empty()) + } + + fn is_router_scope_hint(&self) -> bool { + let scope = self.scope.as_deref().unwrap_or(""); + let objective = self.objective.as_deref().unwrap_or(""); + is_router_scope(scope) || is_router_scope(objective) + } + + fn is_info_scope_hint(&self) -> bool { + let scope = self.scope.as_deref().unwrap_or(""); + let objective = self.objective.as_deref().unwrap_or(""); + is_info_scope(scope) || is_info_scope(objective) + } + + fn is_builder_scope_hint(&self) -> bool { + let scope = self.scope.as_deref().unwrap_or(""); + let objective = self.objective.as_deref().unwrap_or(""); + is_builder_scope(scope) || is_builder_scope(objective) + } + + fn is_security_or_reliability_sensitive(&self) -> bool { + let haystack = format!( + "{} {} {} {}", + self.scope.as_deref().unwrap_or_default(), + self.objective.as_deref().unwrap_or_default(), + self.risks + .as_ref() + .map(|items| items.join(" ")) + .unwrap_or_default(), + self.fallback_triggers + .as_ref() + .map(|items| items.join(" ")) + .unwrap_or_default() + ); + is_security_signal(&haystack) || self + .artifact_refs + .as_ref() + .is_some_and(|refs| refs.iter().any(|value| { + let normalized = value.to_ascii_lowercase(); + normalized.contains("security") || normalized.contains("privacy") + })) + } + + fn is_contradictory_or_ambiguous(&self) -> bool { + let haystack = format!( + "{} {} {}", + self.scope.as_deref().unwrap_or_default(), + self.objective.as_deref().unwrap_or_default(), + self.fallback_triggers + .as_ref() + .map(|items| items.join(" ")) + .unwrap_or_default(), + ); + let is_contradictory = self + .failed_checks + .as_ref() + .is_some_and(|checks| { + checks + .iter() + .any(|value| is_contradictory_signal(value)) + }); + let is_ambiguous = is_ambiguous_signal(&haystack); + is_contradictory || is_ambiguous + } +} + +impl ReviewEnvelope { + pub fn from_toon(content: &str) -> Result { + parse_toon(content) + } +} + +impl HandoffOut { + fn has_output(&self) -> bool { + self.alternatives.is_some() + || self.recommended + || self.cite_sources + || self.format.is_some() + || self.diff_summary.unwrap_or(false) + || !self.files.is_empty() + || !self.tests.is_empty() + || self.risks.is_some() + || self.open_questions.is_some() + } +} + +impl HandoffCriteria { + fn has_requirements(&self) -> bool { + !self.must.is_empty() || !self.fail.is_empty() + } +} + +#[derive(Debug, Clone)] +struct Rule { + left: String, + model: String, +} + +impl Rule { + fn matches(&self, handoff: &HandoffEnvelope) -> bool { + if self + .left + .split("->") + .next() + .is_some_and(|left| contains_token_or_phrase(handoff.scope.as_deref().unwrap_or(""), left)) + { + return true; + } + + if contains_token_or_phrase( + handoff.objective.as_deref().unwrap_or(""), + &self.left, + ) { + return true; + } + + handoff + .constraints + .as_ref() + .into_iter() + .flat_map(|constraints| constraints.iter()) + .chain(self.artifact_refs_or_fallback(handoff).iter()) + .chain(handoff.context.refs.iter()) + .any(|value| contains_token_or_phrase(value, &self.left)) + } + + fn artifact_refs_or_fallback<'a>(&self, handoff: &'a HandoffEnvelope) -> &'a Vec { + handoff.artifact_refs.as_ref().unwrap_or(&handoff.context.refs) + } +} + +fn parse_rule(raw: &str) -> Option { + let mut split = raw.splitn(2, "=>"); + let left = split.next()?.trim().to_ascii_lowercase(); + let model = split.next()?.trim().to_string(); + + if left.is_empty() || model.is_empty() { + return None; + } + + Some(Rule { left, model }) +} + +fn is_router_scope(scope: &str) -> bool { + is_scope_like(scope, &[ + "architectural", + "architectural_decision", + "triage", + "tradeoff", + "ambiguous", + "ambiguity", + "security", + "privacy", + "migration", + "incident", + "reliability", + ]) +} + +fn is_info_scope(scope: &str) -> bool { + is_scope_like(scope, &[ + "research", + "information", + "information_gathering", + "docs", + "api", + "contract", + "comparison", + "search", + "options", + "lookup", + "investigate", + ]) +} + +fn is_builder_scope(scope: &str) -> bool { + is_scope_like(scope, &[ + "code", + "code_change", + "implementation", + "refactor", + "tests", + "test", + "debug", + "debugging", + "execution", + "implement", + "build", + ]) +} + +fn is_scope_like(scope: &str, tokens: &[&str]) -> bool { + tokens.iter().any(|token| contains_token_or_phrase(scope, token)) +} + +fn is_security_signal(value: &str) -> bool { + is_token_or_phrase_in_set( + value, + &[ + "security", + "privacy", + "migration", + "incident", + "data loss", + "data-loss", + "reliability", + "production", + ], + ) +} + +fn is_ambiguous_signal(value: &str) -> bool { + is_token_or_phrase_in_set(value, &["ambiguous", "ambiguity", "underdetermined", "unclear"]) +} + +fn is_contradictory_signal(value: &str) -> bool { + is_token_or_phrase_in_set(value, &["contradictory", "conflict", "inconsistent", "mismatch"]) +} + +fn is_token_or_phrase_in_set(value: &str, tokens: &[&str]) -> bool { + let normalized = value.to_ascii_lowercase(); + tokens.iter().any(|token| contains_token_or_phrase(&normalized, token)) +} + +fn is_allowed_model(policy: &RoutingPolicy, model: &str) -> bool { + let m = model.trim(); + m == policy.policy.router || m == policy.policy.info || m == policy.policy.build || m == policy.policy.escalate_to +} + +fn contains_token_or_phrase(target: &str, expected: &str) -> bool { + if expected.is_empty() { + return false; + } + let normalized = target.to_ascii_lowercase(); + normalized.split(|c: char| !c.is_ascii_alphanumeric() && c != '_') + .any(|token| !token.is_empty() && token == expected) + || normalized.contains(expected) +} + +fn parse_toon(content: &str) -> Result { + decode_default(content).context("failed to decode TOON payload") +} + +#[cfg(test)] +mod tests { + use super::*; + + const POLICY_TP: &str = r##" +policy: + router: "gpt-5.4" + info: "gpt-5.4-mini" + build: "gpt-5.3-codex-spark" + escalate_to: "gpt-5.4" + conf_threshold: 0.85 +routing: + decision_rules[4]: "architectural_decision => gpt-5.4", "debugging_or_testing => gpt-5.3-codex-spark", "docs_api_lookup_or_comparison => gpt-5.4-mini", "security_privacy_migration_incident => gpt-5.4" +gate: + require_fields[15]: "task_id", "from_model", "to_model", "objective", "scope", "constraints", "current_state", "artifact_refs", "expected_output", "acceptance_criteria", "risks", "fallback_triggers", "conf", "assumptions_made", "open_questions", "failed_checks" + fail_if_conf_less: 0.85 + if_conflict_or_missing: "reroute_to_router" + if_test_failure: "reroute_to_builder_once_then_router" +"##; + + const RESEARCH_ENVELOPE: &str = r##" +task_id: + value: "task-102" +from_model: "gpt-5.4" +to_model: "gpt-5.4-mini" +objective: "Find current API contract changes needed for feature X." +scope: "information_gathering" +constraints[3]: "no code edits", "cite source path", "stack: rust" +current_state: "prior_work: baseline-impl-v2" +artifact_refs[2]: "docs/api.md#120-210", "issues/412" +expected_output: + alternatives: ["A", "B"] + recommended_choice: "A" + citations: ["docs/api.md#120-210"] +acceptance_criteria[3]: "produce >=2 options", "explain tradeoffs", "state confidence" +risks[1]: "none" +fallback_triggers[1]: "none" +conf: 0.88 +assumptions_made[1]: "contracts are stable" +open_questions[1]: "none" +failed_checks[1]: "none" +"##; + + const IMPLEMENTER_ENVELOPE: &str = r##" +task_id: + value: "task-102" +from_model: "gpt-5.4" +to_model: "gpt-5.3-codex-spark" +objective: "Implement the chosen API adjustment with tests." +scope: "code_change" +constraints[4]: "edit only listed files", "maintain existing behavior", "no_semver_break", "no code edits" +current_state: "decision_outcome: option_a_selected" +artifact_refs[3]: "research/task-102.toon", "src/current_api.ts", "tests/api.test.ts" +expected_output: + code_diff_summary: "..." + test_plan: ["/ path"] + risk_notes: ["none"] + self_check: "ok" +acceptance_criteria[3]: "tests added/updated", "no breaking change", "backwards behavior unchanged" +risks[1]: "none" +fallback_triggers[1]: "implementation validation required" +conf: 0.9 +assumptions_made[1]: "prior research is current" +open_questions[1]: "none" +failed_checks[1]: "none" +"##; + + const REVIEW_ENVELOPE: &str = r##" +from: + value: "gpt-5.3-codex-spark" +to: + value: "gpt-5.4" +review: + task: "task-102" + conf: 0.82 + findings[3]: "assumptions", "risks", "validation_status" + required[3]: "evidence", "counterexamples", "alternative_if_incomplete" + gate: "approve | revise | escalate" +"##; + + #[test] + fn parses_policy_toon_and_routes_by_scope() { + let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy"); + let handoff = HandoffEnvelope::from_toon(RESEARCH_ENVELOPE).expect("parse research"); + assert_eq!( + policy + .parse_route(&handoff) + .expect("route") + .model, + "gpt-5.4-mini" + ); + } + + #[test] + fn validates_required_envelope_fields() { + let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy"); + let handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer"); + assert!(handoff.validate_required_fields(&policy).is_ok()); + } + + #[test] + fn escalates_when_confidence_below_threshold() { + let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy"); + let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer"); + handoff.conf = Some(0.6); + + let routed = policy.parse_route(&handoff).expect("route"); + assert_eq!(routed.model, "gpt-5.4"); + } + + #[test] + fn escalates_on_security_signal() { + let policy = RoutingPolicy::from_toon(POLICY_TP).expect("parse policy"); + let mut handoff = HandoffEnvelope::from_toon(IMPLEMENTER_ENVELOPE).expect("parse implementer"); + handoff.risks = Some(vec!["privacy".to_string(), "migration".to_string()]); + + let routed = policy.parse_route(&handoff).expect("route"); + assert_eq!(routed.model, "gpt-5.4"); + } + + #[test] + fn parses_review_envelope_toon() { + let review = ReviewEnvelope::from_toon(REVIEW_ENVELOPE).expect("parse review"); + assert_eq!(review.from.as_str(), "gpt-5.3-codex-spark"); + assert_eq!(review.review.task.as_deref(), Some("task-102")); + } +}