From 32147d45520495a117229a248177e88149a18f4a Mon Sep 17 00:00:00 2001 From: eric Date: Sat, 4 Apr 2026 13:12:17 +0200 Subject: [PATCH] feat: 3 person review --- .agent/controller-loop/task.toon | 10 +- .../goal.md | 3 - .../plan.toon | 3 - .../standards.md | 5 - .../state.toon | 21 - .agent/controllers/controller-loop/goal.md | 4 - .agent/controllers/controller-loop/plan.toon | 3 - .../controllers/controller-loop/standards.md | 8 - .agent/controllers/controller-loop/state.toon | 17 - .../controllers/keystone-seam-audit/goal.md | 20 - .../controllers/keystone-seam-audit/plan.toon | 94 - .../keystone-seam-audit/standards.md | 14 - .../keystone-seam-audit/state.toon | 25 - .agent/controllers/longview-planner/goal.md | 4 + .agent/controllers/longview-planner/plan.toon | 82 + .../controllers/longview-planner/standards.md | 10 + .../controllers/longview-planner/state.toon | 49 + .agent/controllers/module-mosaic/goal.md | 5 - .agent/controllers/module-mosaic/plan.toon | 130 -- .agent/controllers/module-mosaic/standards.md | 9 - .agent/controllers/module-mosaic/state.toon | 43 - .agent/controllers/runtime-open/goal.md | 3 - .agent/controllers/runtime-open/plan.toon | 3 - .agent/controllers/runtime-open/standards.md | 5 - .agent/controllers/runtime-open/state.toon | 22 - .../controllers/teamwise-prompt-lens/goal.md | 12 - .../teamwise-prompt-lens/plan.toon | 89 - .../teamwise-prompt-lens/standards.md | 13 - .../teamwise-prompt-lens/state.toon | 30 - src/app/mod.rs | 37 +- src/app/picker.rs | 29 +- src/app/runtime/events.rs | 3 + src/app/runtime/mod.rs | 20 +- src/app/runtime/usage.rs | 17 +- src/app/runtime/workspace.rs | 17 +- src/app/tests.rs | 21 + src/controller/engine.rs | 100 +- src/controller/executor.rs | 23 +- src/controller/planner.rs | 14 + src/model/controller.rs | 58 + src/model/mod.rs | 10 + src/model/response.rs | 139 ++ src/model/schema.rs | 179 ++ src/planning/forwarder.rs | 1560 ++++++++++++++++- src/planning/session.rs | 18 +- src/process/codex.rs | 3 + src/process/mod.rs | 2 +- src/process/usage.rs | 28 +- src/storage/toon/controllers.rs | 6 +- src/ui/mod.rs | 38 +- 50 files changed, 2398 insertions(+), 660 deletions(-) delete mode 100644 .agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/goal.md delete mode 100644 .agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/plan.toon delete mode 100644 .agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/standards.md delete mode 100644 .agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/state.toon delete mode 100644 .agent/controllers/controller-loop/goal.md delete mode 100644 .agent/controllers/controller-loop/plan.toon delete mode 100644 .agent/controllers/controller-loop/standards.md delete mode 100644 .agent/controllers/controller-loop/state.toon delete mode 100644 .agent/controllers/keystone-seam-audit/goal.md delete mode 100644 .agent/controllers/keystone-seam-audit/plan.toon delete mode 100644 .agent/controllers/keystone-seam-audit/standards.md delete mode 100644 .agent/controllers/keystone-seam-audit/state.toon create mode 100644 .agent/controllers/longview-planner/goal.md create mode 100644 .agent/controllers/longview-planner/plan.toon create mode 100644 .agent/controllers/longview-planner/standards.md create mode 100644 .agent/controllers/longview-planner/state.toon delete mode 100644 .agent/controllers/module-mosaic/goal.md delete mode 100644 .agent/controllers/module-mosaic/plan.toon delete mode 100644 .agent/controllers/module-mosaic/standards.md delete mode 100644 .agent/controllers/module-mosaic/state.toon delete mode 100644 .agent/controllers/runtime-open/goal.md delete mode 100644 .agent/controllers/runtime-open/plan.toon delete mode 100644 .agent/controllers/runtime-open/standards.md delete mode 100644 .agent/controllers/runtime-open/state.toon delete mode 100644 .agent/controllers/teamwise-prompt-lens/goal.md delete mode 100644 .agent/controllers/teamwise-prompt-lens/plan.toon delete mode 100644 .agent/controllers/teamwise-prompt-lens/standards.md delete mode 100644 .agent/controllers/teamwise-prompt-lens/state.toon diff --git a/.agent/controller-loop/task.toon b/.agent/controller-loop/task.toon index e9df0fe..2786e27 100644 --- a/.agent/controller-loop/task.toon +++ b/.agent/controller-loop/task.toon @@ -1,9 +1,9 @@ engine: "data-driven-v1" -goal_file: ".agent/controllers/teamwise-prompt-lens/goal.md" -plan_file: ".agent/controllers/teamwise-prompt-lens/plan.toon" -state_file: ".agent/controllers/teamwise-prompt-lens/state.toon" -standards_file: ".agent/controllers/teamwise-prompt-lens/standards.md" -branch: "codex/teamwise-prompt-lens" +goal_file: ".agent/controllers/longview-planner/goal.md" +plan_file: ".agent/controllers/longview-planner/plan.toon" +state_file: ".agent/controllers/longview-planner/state.toon" +standards_file: ".agent/controllers/longview-planner/standards.md" +branch: "codex/longview-planner" continue_until: "fixed-point" max_runs: 12 max_wall_clock: 4h \ No newline at end of file diff --git a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/goal.md b/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/goal.md deleted file mode 100644 index 1315b20..0000000 --- a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/goal.md +++ /dev/null @@ -1,3 +0,0 @@ -# Goal - -Describe the goal for this controller. diff --git a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/plan.toon b/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/plan.toon deleted file mode 100644 index c0524ef..0000000 --- a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/plan.toon +++ /dev/null @@ -1,3 +0,0 @@ -version: 1 -goal_summary: No plan yet -steps[0]: \ No newline at end of file diff --git a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/standards.md b/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/standards.md deleted file mode 100644 index e7b6088..0000000 --- a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/standards.md +++ /dev/null @@ -1,5 +0,0 @@ -# Standards - -- Keep code maintainable. -- Avoid one-off hacks. -- Leave tests green. diff --git a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/state.toon b/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/state.toon deleted file mode 100644 index fdb40a3..0000000 --- a/.agent/controllers/code-refactoring-act-as-as-a-senior-software-arc/state.toon +++ /dev/null @@ -1,21 +0,0 @@ -version: 1 -phase: planning -goal_status: unknown -goal_revision: 0 -current_step_id: null -iteration: 0 -replan_required: false -completed_steps[0]: -blocked_steps[0]: -last_verification: null -last_cleanup_summary: null -last_full_test_summary: null -history[0]: -notes[0]: -planning_session: - pending_question: null - transcript[0]: -started_at: "1775272586" -last_usage_refresh_at: "1775272706" -last_usage_input_tokens: null -last_usage_output_tokens: null \ No newline at end of file diff --git a/.agent/controllers/controller-loop/goal.md b/.agent/controllers/controller-loop/goal.md deleted file mode 100644 index 3fe078a..0000000 --- a/.agent/controllers/controller-loop/goal.md +++ /dev/null @@ -1,4 +0,0 @@ -# Goal - -Rewrite `codex-controller-loop` as a Rust TUI-first autonomous controller with TOON-backed machine state and a hard planning/execution phase boundary. - diff --git a/.agent/controllers/controller-loop/plan.toon b/.agent/controllers/controller-loop/plan.toon deleted file mode 100644 index 1ad859f..0000000 --- a/.agent/controllers/controller-loop/plan.toon +++ /dev/null @@ -1,3 +0,0 @@ -version: 1 -goal_summary: Rust TUI-first autonomous controller -steps[0]: diff --git a/.agent/controllers/controller-loop/standards.md b/.agent/controllers/controller-loop/standards.md deleted file mode 100644 index 9ecb78f..0000000 --- a/.agent/controllers/controller-loop/standards.md +++ /dev/null @@ -1,8 +0,0 @@ -# Standards - -- Keep the Rust code modular and readable. -- Treat planning as the only user-input phase. -- Treat execution as autonomous except for pause, resume, stop, and goal update. -- Keep controller-owned machine state in TOON files. -- Leave the codebase in a maintainable state after each completed step. - diff --git a/.agent/controllers/controller-loop/state.toon b/.agent/controllers/controller-loop/state.toon deleted file mode 100644 index 37ce501..0000000 --- a/.agent/controllers/controller-loop/state.toon +++ /dev/null @@ -1,17 +0,0 @@ -version: 1 -phase: planning -goal_status: unknown -goal_revision: 0 -current_step_id: null -iteration: 0 -replan_required: false -completed_steps[0]: -blocked_steps[0]: -last_verification: null -last_cleanup_summary: null -last_full_test_summary: null -history[0]: -notes[0]: -planning_session: - pending_question: null - transcript[0]: diff --git a/.agent/controllers/keystone-seam-audit/goal.md b/.agent/controllers/keystone-seam-audit/goal.md deleted file mode 100644 index 8bc5325..0000000 --- a/.agent/controllers/keystone-seam-audit/goal.md +++ /dev/null @@ -1,20 +0,0 @@ -# Goal - -Identify oversized, hand-maintained source files in the repository, prioritize the highest-value refactor targets, and split them into smaller, cohesive modules without changing external behavior. - -A file should be considered a refactor candidate when it is materially large or overloaded, using these default signals: -- More than 300 lines of hand-written code. -- Multiple unrelated responsibilities in one file. -- Difficult-to-test logic mixed with I/O, UI, routing, state wiring, or formatting. - -Execution requirements: -- Ignore generated, vendored, build, cache, and lock files unless the repository clearly treats them as hand-maintained source. -- Refactor incrementally, one target at a time, starting with the largest safe candidate. -- Preserve public APIs and user-visible behavior unless a compatibility adjustment is required to complete the split safely. -- Leave the repository in a clean, test-passing state. - -Expected outputs: -- Smaller files with clearer ownership boundaries. -- Any necessary import/export or module wiring updates. -- Tests updated or added when needed to preserve behavior. -- A concise summary of files split, new module boundaries, and verification results. \ No newline at end of file diff --git a/.agent/controllers/keystone-seam-audit/plan.toon b/.agent/controllers/keystone-seam-audit/plan.toon deleted file mode 100644 index 1b11d6d..0000000 --- a/.agent/controllers/keystone-seam-audit/plan.toon +++ /dev/null @@ -1,94 +0,0 @@ -version: 1 -goal_summary: "Audit the repository for oversized hand-maintained source files, prioritize safe high-value refactor targets, split them into smaller cohesive modules, and finish with passing validation and a clean diff." -steps[6]: - - id: s1 - title: Establish Safe Refactor Scope - purpose: "Load controller inputs and repository constraints, confirm the working tree state, and define the exact file-selection rules so execution can proceed without ambiguity." - inputs[4]: ".agent/controllers/keystone-seam-audit/goal.md",".agent/controllers/keystone-seam-audit/standards.md",repository working tree,"repo-level instructions such as AGENTS.md if present" - outputs[3]: confirmed execution constraints,candidate exclusion rules,list of locally modified files to avoid or handle carefully - dependencies[0]: - verification[2]: - - label: Check working tree state - commands[1]: "git status --short" - - label: Locate repo instructions - commands[1]: "rg --files -g 'AGENTS.md' -g '.agent/**'" - cleanup_requirements[1]{label,description}: - No accidental overlap with user edits,Do not modify files with unrelated local changes unless the change is required and the existing edits are understood and preserved. - status: active - attempts: 1 - - id: s2 - title: "Inventory Large Hand-Maintained Files" - purpose: "Produce a ranked inventory of oversized source files while excluding generated and third-party material." - inputs[2]: repository file list,selection thresholds from goal and standards - outputs[3]: ranked candidate list with line counts,"excluded-path list",initial top refactor targets - dependencies[1]: s1 - verification[2]: - - label: Enumerate tracked files - commands[1]: "rg --files" - - label: Rank large files by line count - commands[1]: "python - <<'PY'\nimport os, subprocess\nexclude = {'node_modules','dist','build','coverage','.git','.next','.svelte-kit','target','vendor'}\nfiles = subprocess.check_output(['rg','--files']).decode().splitlines()\nrows = []\nfor f in files:\n parts = set(f.split('/'))\n if parts & exclude:\n continue\n if os.path.splitext(f)[1] in {'.png','.jpg','.jpeg','.gif','.svg','.lock','.snap','.min.js','.map'}:\n continue\n try:\n with open(f,'r',encoding='utf-8') as fh:\n n = sum(1 for _ in fh)\n except Exception:\n continue\n if n > 300:\n rows.append((n,f))\nfor n,f in sorted(rows, reverse=True)[:50]:\n print(f'{n}\\t{f}')\nPY" - cleanup_requirements[1]{label,description}: - Discard false positives,"Remove generated files, migration dumps, fixtures, and machine-authored artifacts from the candidate list before choosing targets." - status: todo - attempts: 0 - - id: s3 - title: Choose Refactor Order And Boundaries - purpose: "Inspect the largest candidates, decide which files are safe and high-value to split first, and define intended module boundaries before editing." - inputs[3]: ranked candidate list,current file contents,existing module structure - outputs[3]: ordered target list,boundary notes for each target,"explicit non-goals for each refactor" - dependencies[1]: s2 - verification[2]: - - label: Inspect top candidates - commands[2]: "sed -n '1,220p' ","sed -n '221,440p' " - - label: Map exports and dependents - commands[1]: "rg -n \"from ['\\\"]|require\\(\" " - cleanup_requirements[1]{label,description}: - "Avoid cosmetic-only churn","Do not split files purely by line count; only proceed when coherent seams such as utilities, domain logic, adapters, routes, or components are identifiable." - status: todo - attempts: 0 - - id: s4 - title: Refactor First Target Incrementally - purpose: "Split the highest-priority candidate into smaller cohesive files while preserving behavior and keeping the change reviewable." - inputs[3]: first target file,boundary notes,repo conventions - outputs[3]: new smaller modules,updated imports/exports,"target-specific validation result" - dependencies[1]: s3 - verification[2]: - - label: Run targeted tests or checks - commands[1]: "" - - label: Confirm file size reduction - commands[1]: "wc -l " - cleanup_requirements[1]{label,description}: - Remove temporary seams,"Delete transitional helpers, dead exports, and unused imports created during the split before moving on." - status: todo - attempts: 0 - - id: s5 - title: "Repeat For Remaining High-Value Targets" - purpose: "Continue the same refactor pattern for additional oversized files until the main high-value targets are addressed or diminishing returns are reached." - inputs[2]: remaining ordered targets,lessons from first refactor - outputs[3]: additional split modules,updated dependency wiring,"per-target validation notes" - dependencies[1]: s4 - verification[2]: - - label: "Run per-target validation after each split" - commands[1]: "" - - label: Track remaining oversized files - commands[1]: "python - <<'PY'\nimport os, subprocess\nexclude = {'node_modules','dist','build','coverage','.git','.next','.svelte-kit','target','vendor'}\nfiles = subprocess.check_output(['rg','--files']).decode().splitlines()\nfor f in files:\n parts = set(f.split('/'))\n if parts & exclude:\n continue\n try:\n with open(f,'r',encoding='utf-8') as fh:\n n = sum(1 for _ in fh)\n except Exception:\n continue\n if n > 300:\n print(f'{n}\\t{f}')\nPY" - cleanup_requirements[1]{label,description}: - Stop at sensible boundary,"Do not keep splitting once modules are cohesive and maintainable; leave well-structured files intact even if they remain moderately large." - status: todo - attempts: 0 - - id: s6 - title: Run Full Validation And Final Cleanup - purpose: "Verify repository health, remove leftover refactor debris, and produce a concise execution summary for the controller result." - inputs[2]: all refactor changes,repository validation commands - outputs[3]: final passing validation results,cleaned diff,"summary of targets, seams, and tests" - dependencies[1]: s5 - verification[2]: - - label: Run broadest available validation - commands[3]: "","","" - - label: Check for leftover issues - commands[2]: "git diff --check","git status --short" - cleanup_requirements[2]{label,description}: - Leave clean refactor artifacts,"Remove unused files, stale exports, dead code, and temporary comments; ensure only intentional source changes remain." - Record outcome,"Summarize which files were split, the new module boundaries, any tests added or updated, and any remaining large files intentionally left unchanged." - status: todo - attempts: 0 \ No newline at end of file diff --git a/.agent/controllers/keystone-seam-audit/standards.md b/.agent/controllers/keystone-seam-audit/standards.md deleted file mode 100644 index 6c84948..0000000 --- a/.agent/controllers/keystone-seam-audit/standards.md +++ /dev/null @@ -1,14 +0,0 @@ -# Standards - -- Prefer cohesive modules with a single clear responsibility over line-count-only splitting. -- Preserve behavior first; do not mix feature work with the refactor. -- Avoid one-off abstractions. New modules must reflect real ownership boundaries that the codebase can keep using. -- Keep public interfaces stable unless a minimal coordinated update is required. -- Exclude generated, vendored, compiled, snapshot, fixture, and lock files from refactor targets unless explicitly treated as source by the repo. -- Before editing, check for existing local changes in candidate files and avoid overwriting user work. -- Refactor in small, reviewable increments so each target can be validated independently. -- Run the smallest relevant test or lint command after each target, then run the broadest available final validation before finishing. -- If no automated tests exist for a risky seam, add or strengthen focused tests before or during the refactor when practical. -- Keep names, module placement, and imports maintainable and consistent with existing repo conventions. -- Remove dead helpers, obsolete re-exports, and temporary scaffolding introduced during the split. -- Leave tests green. \ No newline at end of file diff --git a/.agent/controllers/keystone-seam-audit/state.toon b/.agent/controllers/keystone-seam-audit/state.toon deleted file mode 100644 index a453fcb..0000000 --- a/.agent/controllers/keystone-seam-audit/state.toon +++ /dev/null @@ -1,25 +0,0 @@ -version: 1 -phase: blocked -goal_status: blocked -goal_revision: 1 -current_step_id: null -iteration: 0 -replan_required: false -completed_steps[0]: -blocked_steps[0]: -last_verification: null -last_cleanup_summary: null -last_full_test_summary: null -history[0]: -notes[1]: No actionable step remained and autonomous replan produced nothing. -planning_session: - pending_question: null - transcript[4]{role,content}: - user,test - assistant,"What should the `keystone-seam-audit` controller actually do? Please specify the target system/repo area, what a \"seam audit\" should inspect, and the expected output artifact(s)." - user,find large files and refactor to smaller files - assistant,Planning completed -started_at: "1775273562" -last_usage_refresh_at: "1775275327" -last_usage_input_tokens: null -last_usage_output_tokens: null \ No newline at end of file diff --git a/.agent/controllers/longview-planner/goal.md b/.agent/controllers/longview-planner/goal.md new file mode 100644 index 0000000..894298c --- /dev/null +++ b/.agent/controllers/longview-planner/goal.md @@ -0,0 +1,4 @@ +## Goal +- Improve the new-task goal planner so every newly created task is processed through a consistent three-stage personality chain: Product Owner, Senior Engineer, and Senior Maintainer. +- Use this chain to strengthen requirement clarity, implementation quality, and long-term iterability before a task enters the execution/verifier loop. +- Enforce planning outputs that bias maintainable design over short-term fixes, reducing the risk of test-passing but brittle implementations. diff --git a/.agent/controllers/longview-planner/plan.toon b/.agent/controllers/longview-planner/plan.toon new file mode 100644 index 0000000..580e4f9 --- /dev/null +++ b/.agent/controllers/longview-planner/plan.toon @@ -0,0 +1,82 @@ +version: 1 +goal_summary: "Implement a structured three-persona planning pipeline for new task creation and wire it into quality gates that prevent spaghetti-like outcomes." +steps[6]: + - id: "step-1" + title: "Map current goal-planning execution path" + purpose: "Identify where new tasks are created and where planner output is generated, stored, and consumed." + notes: This is required to avoid adding persona logic in the wrong layer and breaking existing task creation contracts. + inputs[1]: "Current new-task creation flow" + outputs[1]: Execution map of planner entry/egress points + dependencies[0]: + verification[1]: + - label: Baseline understanding + commands[3]: Trace task creation path from command/API input to persisted task goal,"Locate planner modules, serializers, and validation hooks",Document current persona/prompt hooks (if any) + cleanup_requirements[0]: + status: done + attempts: 1 + - id: "step-2" + title: Reset stale active state + purpose: "Re-run finalization of three-persona planner sequencing before execution resumes" + notes: "Recovered stale active step state required step-2 to be retried before proceeding." + inputs[0]: + outputs[0]: + dependencies[0]: + verification[0]: + cleanup_requirements[0]: + status: done + attempts: 1 + - id: "step-3" + title: Implement sequential persona pipeline in goal planner + purpose: "Apply PO -> Senior Engineer -> Future Engineer transformations and merge outputs into a single maintainability-first goal artifact." + notes: Recovery shows planner execution was reset; this step is now the active unblocked execution path. Controller recovered this step from stale active state and returned it to todo. + inputs[1]: Persona contract and config + outputs[2]: Updated planner pipeline implementation,Deterministic merged goal artifact + dependencies[1]: "step-2" + verification[1]: + - label: Determinism + commands[3]: Add unit tests asserting fixed output ordering and stable merge for same input seed/state,Add tests for conflict resolution and fallback behavior,... 1 more omitted + cleanup_requirements[1]{label,description}: + Safe integration,Remove inline prompt branching that bypasses the new pipeline. + status: done + attempts: 1 + - id: "step-4" + title: "Reopen step-4 implementation" + purpose: Complete remaining implementation work before downstream validation/rollout tasks. + notes: "Recovery flagged step-4 as stale-active; it must be treated as unfinished to keep planner gating chain valid." + inputs[0]: + outputs[0]: + dependencies[0]: + verification[0]: + cleanup_requirements[0]: + status: done + attempts: 1 + - id: "step-5" + title: Add regression tests and rollout docs + purpose: "Validate the three-stage PO→Senior Engineer→Architect flow with deterministic templates, explicit constraints, and maintenance-focused rejection/annotation behavior before proceeding." + notes: "Keep this step as the first blocker: lock in behavioral contracts and anti-bypass coverage, then add concise docs so future work cannot drift into one-off prompts. Controller recovered this step from stale active state and returned it to todo." + inputs[2]: Implemented pipeline and gates,"Three-stage persona templates and merge rules" + outputs[3]: Regression tests for staged transformations,"Tests for downgrade/reject/delta-capture logic that flags maintenance-risk solutions",Developer docs for staged goal generation and constraints + dependencies[1]: "step-4" + verification[1]: + - label: Regression safety + commands[3]: "Run new-task planner unit/integration test set for all 3 passes","Run decision-path tests for reject/downgrade/annotate outcomes","Run existing task-creation and verifier-loop regression tests" + cleanup_requirements[1]{label,description}: + Documentation clarity,"Delete stale references to old single-pass planning behavior." + status: done + attempts: 1 + - id: "step-6" + title: "Enable three-stage persona telemetry and rollout guardrails" + purpose: "Add immutable planner instrumentation and hard enforcement for product-owner, senior-engineer, senior-maintainer stage transitions before execution." + notes: "Implement stage event emission (`Persona stage`, `stage-1`, `stage-2`, `stage-3`) with deterministic persona-pass metadata; block single-pass/bypass paths so every new-task goal goes through all three stages; keep this guardrail work separate from doc/test-generation output paths." + inputs[0]: + outputs[0]: + dependencies[1]: "step-4" + verification[2]: + - label: Stage telemetry contract + commands[2]: "Emit planner session events for every pass including persona template version, constraints, risks, architectural context, and acceptance criteria","Emit explicit `Persona stage` and `stage-1/2/3` transitions from the same approved pipeline path for all new-task goals" + - label: Guardrail enforcement + commands[2]: "Reject execution when single-pass planning or stage skipping is detected","Add rollout counters for rejected/annotated goals by failure type and stage, and alert on threshold breaches" + cleanup_requirements[1]{label,description}: + Operational overhead,"Disable debug-level per-task instrumentation in non-debug environments once the new telemetry/guards are stable." + status: done + attempts: 1 \ No newline at end of file diff --git a/.agent/controllers/longview-planner/standards.md b/.agent/controllers/longview-planner/standards.md new file mode 100644 index 0000000..d48c260 --- /dev/null +++ b/.agent/controllers/longview-planner/standards.md @@ -0,0 +1,10 @@ +## Standards +- Preserve and improve maintainability by requiring architecture-aware task goals, explicit constraints, and explicit iteration context. +- Keep persona behavior deterministic with versioned templates, ordered composition, and stable merge rules. +- Reject, downgrade, or flag goals that optimize correctness only and create high future maintenance cost. +- Avoid one-off hacks and ad-hoc prompt bypasses; route all goal generation through the same approved planner flow. +- Add lightweight observability for each persona transformation and keep existing verification and tests green. +- Stage goal generation must follow the exact order: product-owner, senior-engineer, senior-maintainer. +- Emit all stage transitions as planner session events (`Persona stage`, `stage-1..3`) and include persona pass metadata. +- Enforce persona-pass contracts with explicit constraints, risks, and architecture-aware acceptance criteria in every stage. +- Keep one-pass behavior removed from rollout docs: single-pass planning is no longer an accepted path for new tasks. diff --git a/.agent/controllers/longview-planner/state.toon b/.agent/controllers/longview-planner/state.toon new file mode 100644 index 0000000..9928623 --- /dev/null +++ b/.agent/controllers/longview-planner/state.toon @@ -0,0 +1,49 @@ +version: 1 +phase: done +stop_reason: null +goal_status: done +goal_revision: 1 +current_step_id: null +iteration: 8 +replan_required: true +completed_steps[8]: "step-1","step-2","step-2","step-3","step-4","step-4","step-5","step-6" +blocked_steps[0]: +last_verification: + passed: true + summary: No commands requested + commands[0]: + output[0]: +last_cleanup_summary: + passed: true + summary: "Cleanup accepted for step-6" + commands[0]: + output[5]: Did not run verification or tests per your execution constraints.,"Single-pass legacy schema outputs are still parsed for compatibility but are rejected by the new pipeline contract check before acceptance.","If you want, next step is to add targeted tests for the new counter/threshold paths in forwarder.","",next_step_not_required +last_full_test_summary: + passed: true + summary: No commands requested + commands[0]: + output[0]: +history[8]{timestamp,kind,detail}: + "1775299756","step-complete","Completed step-1" + "1775299853","step-complete","Completed step-2" + "1775299897","step-complete","Completed step-2" + "1775300068","step-complete","Completed step-3" + "1775300173","step-complete","Completed step-4" + "1775300488","step-complete","Completed step-4" + "1775300610","step-complete","Completed step-5" + "1775300806","step-complete","Completed step-6" +notes[4]: "Recovered stale active step state for longview-planner. Reset step-2 to todo.","Recovered stale active step state for longview-planner. Reset step-3 to todo.","Recovered stale active step state for longview-planner. Reset step-4 to todo.","Recovered stale active step state for longview-planner. Reset step-5 to todo." +planning_session: + pending_question: null + transcript[2]{role,content}: + user,"Improve the goal planner when creating a new task. It should pass through a PO personality, a senior engineer personality, a engineer who works on the codebase in 2 years. This is to improve code and prevent the loop from writing spaghetti code that passes verification and tests barely, but is unable to be iterated upon" + assistant,Planning completed +started_at: "1775299715" +last_usage_refresh_at: "1775300358" +last_usage_input_tokens: null +last_usage_output_tokens: null +last_usage_primary_window: null +last_usage_secondary_window: null +run_model: "gpt-5.3-codex-spark" +fast_mode: true +allow_branching: false \ No newline at end of file diff --git a/.agent/controllers/module-mosaic/goal.md b/.agent/controllers/module-mosaic/goal.md deleted file mode 100644 index 480f044..0000000 --- a/.agent/controllers/module-mosaic/goal.md +++ /dev/null @@ -1,5 +0,0 @@ -# Goal - -Refactor the oversized Rust modules in this repository into smaller, focused directory modules without changing runtime behavior, controller flow, or persisted controller file formats. - -Prioritize `src/ui/mod.rs`, `src/model.rs`, `src/process.rs`, `src/storage/toon.rs`, `src/app/workspace_input.rs`, and `src/app/runtime.rs`. End with thin `mod.rs` facades, clear ownership boundaries, stable public APIs or `pub use` reexports where they reduce churn, colocated tests for the moved logic, and a green Rust verification pass. \ No newline at end of file diff --git a/.agent/controllers/module-mosaic/plan.toon b/.agent/controllers/module-mosaic/plan.toon deleted file mode 100644 index d7aafeb..0000000 --- a/.agent/controllers/module-mosaic/plan.toon +++ /dev/null @@ -1,130 +0,0 @@ -version: 6 -goal_summary: "Refactor the remaining oversized Rust modules into focused directory modules with thin facades, stable public APIs, colocated tests, unchanged controller behavior and persisted formats, then finish with full Rust verification." -steps[8]: - - id: guardrails - title: Add Refactor Guardrails - purpose: Lock down the current behavior that must survive file moves before changing module structure. - notes: "Completed. Guardrail coverage exists for model, process, storage, app, and UI behavior, including serialized cwd-sensitive TOON tests and the cached session-view shape used by app/UI call sites." - inputs[7]: src/model.rs,src/process.rs,src/storage/toon.rs,src/app/tests.rs,src/ui/mod.rs,src/app/session.rs,src/app/mod.rs - outputs[5]: Focused tests covering plan/state/schema behavior in the model boundary,"Focused tests covering codex event parsing, usage snapshots, and stderr filtering","Focused tests covering TOON roundtrips, controller discovery, controller id normalization, and cwd-sensitive discovery flows","Focused app tests covering planning command gating, scrolling, and submission behavior","Focused UI tests covering screen rendering, wrapping, and session selection extraction" - dependencies[0]: - verification[1]: - - label: Targeted guardrail tests - commands[5]: "cargo test -q model::tests","cargo test -q process::tests","cargo test -q storage::toon::tests","cargo test -q app::tests","cargo test -q ui::tests" - cleanup_requirements[2]{label,description}: - Colocate new tests,"Keep each new or moved test with the module that owns the behavior instead of adding another catch-all test file." - Avoid duplicate fixtures,Reuse existing sample app and model fixtures where possible so the refactor does not create parallel test scaffolding. - status: done - attempts: 1 - - id: "model-modules" - title: Split Shared Model Types - purpose: "Refactor `src/model.rs` into focused submodules while preserving the existing `crate::model::*` surface." - notes: "Completed. `src/model.rs` is now a directory facade with focused submodules, stable reexports, and colocated tests protecting schemas, plan helpers, session views, and response types." - inputs[1]: src/model.rs - outputs[8]: src/model/mod.rs facade reexporting the stable public model API,"src/model/controller.rs for screen, phase, goal status, step status, and controller state types","src/model/plan.rs for task config, plan structs, and plan mutation helpers","src/model/session.rs for session enums, grouping, cursor, cached session-view, and selection helpers",src/model/usage.rs for usage and status snapshot types,"src/model/response.rs for planner, executor, and controller summary response types",src/model/schema.rs for JSON schema builders,Model tests moved beside their owning submodules - dependencies[1]: guardrails - verification[1]: - - label: Model regression tests - commands[6]: "cargo test -q model::controller::tests","cargo test -q model::plan::tests","cargo test -q model::session::tests","cargo test -q model::schema::tests","cargo test -q model::usage::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - Thin facade,Leave `src/model/mod.rs` as a reexport surface rather than reintroducing large inline implementations there. - Serde and schema parity,"Keep existing serde attributes, defaults, and JSON schema output unchanged while splitting the code." - No stale aliases,"Remove transitional imports or compatibility types that only mirror the old single-file layout once the facade exports are wired." - status: done - attempts: 3 - - id: "process-modules" - title: Split Process Execution And Parsing - purpose: "Refactor `src/process.rs` into focused modules for codex execution, shell execution, usage snapshots, and event parsing." - notes: "Completed. `src/process.rs` was replaced with focused modules and a stable facade. Public entry points remain unchanged, targeted parser and usage tests pass, and no new process-specific warnings were introduced." - inputs[2]: src/process.rs,src/model/mod.rs - outputs[6]: src/process/mod.rs facade preserving the current public functions,src/process/codex.rs for `run_codex_with_schema` and controller id generation,src/process/shell.rs for shell command execution and command summaries,src/process/usage.rs for usage snapshot helpers,src/process/parser.rs for codex JSON line parsing and rendering helpers,Process parsing tests kept with the parser and usage modules - dependencies[2]: guardrails,"model-modules" - verification[1]: - - label: Process regression tests - commands[3]: "cargo test -q process::parser::tests","cargo test -q process::usage::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - Stable process API,"Preserve the current `crate::process::*` entry points so controller and app call sites stay unchanged." - No parsing drift,"Keep command, tool, thinking, usage, and stderr event behavior identical after extraction." - Cull moved dead code,Delete obsolete inline helpers in the old file instead of leaving duplicate parser or usage logic behind. - status: done - attempts: 1 - - id: "storage-toon-modules" - title: Split TOON Persistence Helpers - purpose: "Refactor `src/storage/toon.rs` into focused persistence, discovery, codec, and id helper modules without changing file formats." - notes: "Completed. `src/storage/toon.rs` is now a focused directory module with stable facade exports, shared codec helpers, preserved cwd-sensitive discovery behavior, and passing targeted plus full test runs." - inputs[2]: src/storage/toon.rs,src/model/mod.rs - outputs[6]: src/storage/toon/mod.rs facade preserving the current storage API,src/storage/toon/files.rs for controller file creation and markdown read/write helpers,src/storage/toon/codec.rs for shared TOON read and write helpers,"src/storage/toon/controllers.rs for controller creation, listing, summaries, discovery, and timestamp helpers","src/storage/toon/ids.rs for normalization, uniqueness, fallback, and suffix helpers","Storage tests split between codec, discovery, and id modules" - dependencies[2]: guardrails,"model-modules" - verification[1]: - - label: Storage regression tests - commands[5]: "cargo test -q storage::toon::codec::tests","cargo test -q storage::toon::controllers::tests","cargo test -q storage::toon::ids::tests","cargo test -q storage::toon::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - No format drift,"Keep persisted markdown and TOON content shape byte-compatible except for harmless formatting already produced by existing helpers." - Single codec path,Route all TOON encoding and decoding through shared codec helpers instead of leaving duplicate inline file logic. - "Preserve cwd-sensitive tests","Keep the existing cwd mutex and discovery test discipline intact so file-system behavior stays deterministic." - status: done - attempts: 1 - - id: "app-runtime-modules" - title: Split App Runtime Lifecycle - purpose: "Refactor `src/app/runtime.rs` into focused modules for workspace lifecycle, runtime events, and usage refresh while keeping `App` behavior stable." - notes: "Completed. `src/app/runtime.rs` now routes through focused runtime modules with stable `impl App` entry points, expanded colocated tests, and a clean repository-wide verification pass after extraction." - inputs[5]: src/app/runtime.rs,src/app/mod.rs,src/process/mod.rs,src/storage/toon/mod.rs,src/app/session.rs - outputs[5]: src/app/runtime/mod.rs coordinating the runtime submodules,"src/app/runtime/workspace.rs for open, create, load, picker refresh, and shutdown flows",src/app/runtime/events.rs for draining and applying runtime events plus local session entry helpers,src/app/runtime/usage.rs for usage refresh and state persistence,"Expanded app tests covering event application, usage refresh, workspace open flows, and cached-session reconstruction" - dependencies[4]: guardrails,"model-modules","process-modules","storage-toon-modules" - verification[1]: - - label: App runtime regression tests - commands[5]: "cargo test -q app::runtime::events::tests","cargo test -q app::runtime::usage::tests","cargo test -q app::runtime::workspace::tests","cargo test -q app::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - Stable App methods,Keep the current `impl App` method names and external call sites intact while moving their bodies into submodules. - Single hydration path,Avoid duplicating workspace state initialization and usage snapshot reconstruction across runtime modules. - "Contain runtime-only logic",Do not let runtime extraction leak storage or process implementation details into unrelated app modules. - status: done - attempts: 1 - - id: "app-workspace-input-modules" - title: Split Workspace Input Handling - purpose: "Refactor `src/app/workspace_input.rs` into focused keyboard, mouse, command, and submission modules while preserving interaction behavior." - notes: "Next execution step. Runtime seams are now stable, so extract interaction logic by ownership boundary while keeping slash commands, planning-mode gating, drag selection, scroll behavior, follow-output resets, warning text, and submission side effects unchanged. Controller recovered this step from stale active state and returned it to todo." - inputs[5]: src/app/workspace_input.rs,src/app/mod.rs,src/app/tests.rs,src/ui/mod.rs,src/app/runtime/mod.rs - outputs[6]: src/app/workspace_input/mod.rs coordinating workspace input entry points,"src/app/workspace_input/mouse.rs for selection, drag, and wheel handling",src/app/workspace_input/keyboard.rs for key dispatch and navigation,src/app/workspace_input/commands.rs for slash command handling and planning mode gating,src/app/workspace_input/submission.rs for user message submission and local session entry creation,"Expanded colocated tests covering slash commands, selection, drag, scroll, follow-output reset behavior, and submission ordering" - dependencies[2]: guardrails,"app-runtime-modules" - verification[1]: - - label: Workspace input regression tests - commands[4]: "cargo test -q app::workspace_input::commands::tests","cargo test -q app::workspace_input::submission::tests","cargo test -q app::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - Keep command strings stable,"Do not change existing slash commands, warning text, or planning-mode restrictions during the split." - No duplicated reset logic,"Centralize selection and follow-output resets instead of copying the same workspace cleanup code into each input module." - Preserve local entry creation,Keep local session entry generation and submission ordering identical so UI and persistence behavior do not drift. - status: active - attempts: 1 - - id: "ui-modules" - title: Split UI Rendering Helpers - purpose: Refactor `src/ui/mod.rs` into focused rendering modules while preserving the current exported helpers and TUI behavior. - notes: "Start after workspace input extraction settles the app-facing seams. Keep `src/ui/mod.rs` as a thin facade, preserve wrapping and selection math exactly, and remove the dead `session_row_cells` helper or relocate its behavior into the owning session-rendering module so no dead-code warning survives." - inputs[5]: src/ui/mod.rs,src/ui/scroll.rs,src/app/mod.rs,src/model/mod.rs,src/app/workspace_input/mod.rs - outputs[9]: src/ui/mod.rs thin facade exporting the stable UI entry points,"src/ui/theme.rs for colors, shared styles, and shell block helpers","src/ui/layout.rs for `WorkspaceLayout`, `SessionView`, and layout calculations",src/ui/picker.rs for controller picker rendering,"src/ui/create_controller.rs for create-controller screen rendering",src/ui/workspace.rs for workspace screen orchestration,"src/ui/session.rs for session row rendering, wrapping, and selection extraction helpers","src/ui/sidebar.rs for plan board, status line, and composer helper rendering",UI tests updated to target the new owning modules without changing rendered behavior - dependencies[4]: guardrails,"model-modules","app-runtime-modules","app-workspace-input-modules" - verification[1]: - - label: UI regression tests - commands[4]: "cargo test -q ui::layout::tests","cargo test -q ui::session::tests","cargo test -q ui::tests","cargo test -q" - cleanup_requirements[3]{label,description}: - Thin facade,Keep `src/ui/mod.rs` limited to module wiring and stable reexports instead of leaving business logic there. - Preserve selection math,"Keep wrapping, selection clipping, and copied session text behavior identical after extraction." - Remove refactor leftovers,Delete dead rendering helpers and stale imports introduced or exposed by the module split. - status: todo - attempts: 0 - - id: "final-integration" - title: Run Final Cleanup And Verification - purpose: "Reconcile imports and module wiring, remove leftover compatibility code, and run the full repository quality gate." - notes: "Final pass after the remaining app and UI splits. Clean wiring and refactor leftovers, confirm that stable APIs remain intact, and rerun the full required verification set after any final lint-driven cleanup that does not change behavior." - inputs[7]: src/model/mod.rs,src/process/mod.rs,src/storage/toon/mod.rs,src/app/runtime/mod.rs,src/app/workspace_input/mod.rs,src/ui/mod.rs,Cargo.toml - outputs[3]: Updated module declarations and imports across `src/`,"Removed dead helpers, stale imports, and compatibility shims left from the large-file split","Green formatting, test, and clippy verification for the refactor" - dependencies[6]: "model-modules","process-modules","storage-toon-modules","app-runtime-modules","app-workspace-input-modules","ui-modules" - verification[1]: - - label: Full verification - commands[3]: "cargo fmt --check","cargo test -q","cargo clippy -q --all-targets --all-features" - cleanup_requirements[3]{label,description}: - Remove leftovers,Delete obsolete inline helpers and transitional reexports once the new module structure is wired in cleanly. - Keep structure intentional,"Do not leave empty modules or one-off directories after the refactor is complete." - No warning regressions,"Do not introduce new dead-code or stale-import warnings as part of the refactor cleanup." - status: todo - attempts: 0 \ No newline at end of file diff --git a/.agent/controllers/module-mosaic/standards.md b/.agent/controllers/module-mosaic/standards.md deleted file mode 100644 index 1a4ebb5..0000000 --- a/.agent/controllers/module-mosaic/standards.md +++ /dev/null @@ -1,9 +0,0 @@ -# Standards - -- Preserve existing behavior, controller orchestration, TUI interactions, and on-disk `.md` and `.toon` controller formats throughout the refactor. -- Prefer focused directory modules when a file mixes responsibilities or grows past roughly 300 lines, and keep `mod.rs` files as thin facades or reexport surfaces. -- Keep public call sites stable unless a narrower API is clearly better, using `pub use` reexports to avoid unnecessary churn. -- Split code by ownership boundary: model/state/schema concerns, process execution and parsing, TOON persistence and controller discovery, app runtime lifecycle, workspace input handling, and UI rendering helpers. -- Move or add focused tests with the code they protect, especially around model schemas, session grouping and selection, process parsing, storage discovery and id generation, runtime event handling, workspace commands, and UI rendering helpers. -- Remove dead helpers, stale imports, and compatibility layers that only mirror the old file layout. -- Finish with `cargo fmt --check`, `cargo test -q`, and `cargo clippy -q --all-targets --all-features` passing. \ No newline at end of file diff --git a/.agent/controllers/module-mosaic/state.toon b/.agent/controllers/module-mosaic/state.toon deleted file mode 100644 index ef61de1..0000000 --- a/.agent/controllers/module-mosaic/state.toon +++ /dev/null @@ -1,43 +0,0 @@ -version: 1 -phase: executing -stop_reason: null -goal_status: "in-progress" -goal_revision: 2 -current_step_id: null -iteration: 7 -replan_required: false -completed_steps[5]: guardrails,"model-modules","process-modules","storage-toon-modules","app-runtime-modules" -blocked_steps[0]: -last_verification: - passed: true - summary: All commands passed - commands[3]: "cargo fmt --check","cargo test -q","cargo clippy -q --all-targets --all-features" - output[1]: "running 65 tests\n.................................................................\ntest result: ok. 65 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.01s" -last_cleanup_summary: - passed: true - summary: "Cleanup accepted for app-runtime-modules" - commands[0]: - output[3]: Kept runtime behavior stable by preserving the existing `impl App` entry points in `src/app/runtime/mod.rs` and moving only the implementation bodies behind module boundaries.,"Added focused runtime guardrail coverage in `src/app/runtime/events.rs`, `src/app/runtime/usage.rs`, and `src/app/runtime/workspace.rs` instead of expanding the catch-all app test file.","Folded in low-risk cleanup needed for a clean verification pass: collapsed the app event-poll branch in `src/app/mod.rs`, elided a needless lifetime in `src/process/parser.rs`, marked the UI-only helper in `src/ui/mod.rs` as test-only, and explicitly allowed the existing `AppEvent` enum layout instead of changing runtime payload behavior." -last_full_test_summary: - passed: true - summary: All commands passed - commands[4]: "cargo test -q app::runtime::events::tests","cargo test -q app::runtime::usage::tests","cargo test -q app::runtime::workspace::tests","cargo test -q app::tests" - output[4]: "running 3 tests\n...\ntest result: ok. 3 passed; 0 failed; 0 ignored; 0 measured; 62 filtered out; finished in 0.00s","running 2 tests\n..\ntest result: ok. 2 passed; 0 failed; 0 ignored; 0 measured; 63 filtered out; finished in 0.00s","running 1 test\n.\ntest result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 64 filtered out; finished in 0.00s","running 6 tests\n......\ntest result: ok. 6 passed; 0 failed; 0 ignored; 0 measured; 59 filtered out; finished in 0.00s" -history[5]{timestamp,kind,detail}: - "1775277691","step-complete",Completed guardrails - "1775278850","step-complete","Completed model-modules" - "1775279170","step-complete","Completed process-modules" - "1775279529","step-complete","Completed storage-toon-modules" - "1775279938","step-complete","Completed app-runtime-modules" -notes[8]: No actionable step remained and autonomous replan produced nothing.,"Recovered stale active step state for module-mosaic. Reset model-modules to todo.","Recovered stale active step state for module-mosaic. Reset model-modules to todo.",No actionable step remained and autonomous replan produced nothing.,"Recovered stale active step state for module-mosaic. Reset process-modules to todo.","Recovered stale active step state for module-mosaic. Reset storage-toon-modules to todo.","Recovered stale active step state for module-mosaic. Reset app-runtime-modules to todo.","Recovered stale active step state for module-mosaic. Reset app-workspace-input-modules to todo." -planning_session: - pending_question: null - transcript[4]{role,content}: - user,refactor large files to smaller more maintainable files - assistant,Planning completed - user,refactor large files - assistant,Planning completed -started_at: "1775275504" -last_usage_refresh_at: "1775280021" -last_usage_input_tokens: null -last_usage_output_tokens: null \ No newline at end of file diff --git a/.agent/controllers/runtime-open/goal.md b/.agent/controllers/runtime-open/goal.md deleted file mode 100644 index 1315b20..0000000 --- a/.agent/controllers/runtime-open/goal.md +++ /dev/null @@ -1,3 +0,0 @@ -# Goal - -Describe the goal for this controller. diff --git a/.agent/controllers/runtime-open/plan.toon b/.agent/controllers/runtime-open/plan.toon deleted file mode 100644 index c0524ef..0000000 --- a/.agent/controllers/runtime-open/plan.toon +++ /dev/null @@ -1,3 +0,0 @@ -version: 1 -goal_summary: No plan yet -steps[0]: \ No newline at end of file diff --git a/.agent/controllers/runtime-open/standards.md b/.agent/controllers/runtime-open/standards.md deleted file mode 100644 index e7b6088..0000000 --- a/.agent/controllers/runtime-open/standards.md +++ /dev/null @@ -1,5 +0,0 @@ -# Standards - -- Keep code maintainable. -- Avoid one-off hacks. -- Leave tests green. diff --git a/.agent/controllers/runtime-open/state.toon b/.agent/controllers/runtime-open/state.toon deleted file mode 100644 index 90afb73..0000000 --- a/.agent/controllers/runtime-open/state.toon +++ /dev/null @@ -1,22 +0,0 @@ -version: 1 -phase: planning -stop_reason: null -goal_status: unknown -goal_revision: 0 -current_step_id: null -iteration: 0 -replan_required: false -completed_steps[0]: -blocked_steps[0]: -last_verification: null -last_cleanup_summary: null -last_full_test_summary: null -history[0]: -notes[0]: -planning_session: - pending_question: null - transcript[0]: -started_at: null -last_usage_refresh_at: null -last_usage_input_tokens: null -last_usage_output_tokens: null \ No newline at end of file diff --git a/.agent/controllers/teamwise-prompt-lens/goal.md b/.agent/controllers/teamwise-prompt-lens/goal.md deleted file mode 100644 index d41ad97..0000000 --- a/.agent/controllers/teamwise-prompt-lens/goal.md +++ /dev/null @@ -1,12 +0,0 @@ -# Goal - -Turn rough user prompts entered into the controller goal planner into clear, production-quality improvement briefs shaped by a cross-functional software team. - -The controller should: -- reinterpret ambiguous or sloppy requests through the perspectives of an architect, product owner, senior engineer, QA engineer, and other relevant software roles; -- surface missing context, risks, constraints, edge cases, and acceptance criteria before execution begins; -- rewrite the original request into a coherent codebase-improvement prompt that is specific, technically credible, and ready for autonomous planning or implementation; -- prefer maintainable, incremental improvements over novelty or one-off solutions; -- produce outputs that help downstream agents make sound architectural, implementation, testing, and rollout decisions with minimal back-and-forth. - -Success means a weak initial prompt becomes a well-scoped, team-reviewed execution brief with explicit goals, assumptions, constraints, risks, and verification expectations. \ No newline at end of file diff --git a/.agent/controllers/teamwise-prompt-lens/plan.toon b/.agent/controllers/teamwise-prompt-lens/plan.toon deleted file mode 100644 index 8a6f0dd..0000000 --- a/.agent/controllers/teamwise-prompt-lens/plan.toon +++ /dev/null @@ -1,89 +0,0 @@ -version: 1 -goal_summary: "Define a team-oriented planning controller that transforms rough prompts into implementation-ready improvement briefs using cross-functional software perspectives." -steps[6]: - - id: "step-01" - title: Audit Current Controller Artifacts - purpose: "Inspect the existing goal, standards, plan, and state files to replace placeholders and preserve any useful structure." - notes: "The current controller files are placeholder-heavy and need concrete intent before automation can rely on them. One or more commands failed" - inputs[4]: ".agent/controllers/teamwise-prompt-lens/goal.md",".agent/controllers/teamwise-prompt-lens/standards.md",".agent/controllers/teamwise-prompt-lens/plan.toon",".agent/controllers/teamwise-prompt-lens/state.toon" - outputs[3]: Confirmed file inventory,List of placeholder content to replace,Any existing TOON structure worth preserving - dependencies[0]: - verification[1]: - - label: Read current controller files - commands[4]: "sed -n '1,200p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/standards.md","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/plan.toon","sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon" - cleanup_requirements[1]{label,description}: - No stale placeholders,Remove generic placeholder text once the real controller intent is documented. - status: active - attempts: 5 - - id: "step-02" - title: "Define Cross-Functional Prompt Lens" - purpose: "Specify the software-team roles and the exact review dimensions each role contributes to prompt improvement." - notes: The controller needs explicit personas so it consistently upgrades prompts instead of producing generic rewrites. - inputs[2]: "User request for architect, QA, senior engineer, product owner, and broader team input","Findings from step-01" - outputs[3]: Role list for the prompt lens,"Per-role review criteria",Rules for when to include or omit additional roles - dependencies[1]: "step-01" - verification[1]: - - label: Check role coverage in artifacts - commands[1]: "rg -n \"architect|product|senior engineer|qa|operations|security|performance\" .agent/controllers/teamwise-prompt-lens" - cleanup_requirements[1]{label,description}: - Avoid role sprawl,Keep the persona set opinionated and reusable rather than listing every possible specialty. - status: todo - attempts: 0 - - id: "step-03" - title: Rewrite Goal And Standards - purpose: "Replace the placeholder Markdown with controller-specific guidance that matches the desired teamwise prompt transformation behavior." - notes: The goal and standards must be explicit because downstream planning quality depends on them. - inputs[2]: "Outputs from step-01","Outputs from step-02" - outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/goal.md","Updated .agent/controllers/teamwise-prompt-lens/standards.md" - dependencies[1]: "step-02" - verification[1]: - - label: Validate rewritten Markdown content - commands[3]: "sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md","rg -n \"Describe the goal for this controller|placeholder|TODO\" .agent/controllers/teamwise-prompt-lens/goal.md .agent/controllers/teamwise-prompt-lens/standards.md" - cleanup_requirements[1]{label,description}: - Keep standards actionable,Remove vague quality slogans unless they imply a concrete execution rule. - status: todo - attempts: 0 - - id: "step-04" - title: Author Planner Workflow In TOON - purpose: "Encode the planning workflow so the controller consistently turns sloppy prompts into structured, execution-ready briefs." - notes: The main behavioral logic belongs in the plan file because the controller is operating in planning mode. - inputs[2]: Rewritten goal and standards,"Cross-functional prompt lens definition" - outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/plan.toon with ordered planning behavior","Explicit output sections for rewritten prompt, assumptions, risks, acceptance criteria, and verification" - dependencies[1]: "step-03" - verification[1]: - - label: Review plan structure - commands[2]: "sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/plan.toon","rg -n \"assumptions|risks|acceptance criteria|verification|rewrite|team\" .agent/controllers/teamwise-prompt-lens/plan.toon" - cleanup_requirements[1]{label,description}: - No dead branches,Remove unused workflow branches or duplicate instructions that would confuse autonomous execution. - status: todo - attempts: 0 - - id: "step-05" - title: Initialize Stateful Planning Data - purpose: "Define the minimal controller state needed to track prompt quality, assumptions, open questions, and plan readiness across runs." - notes: State should stay minimal so the controller remains predictable and maintainable. - inputs[2]: "Planner workflow from step-04","Existing .agent/controllers/teamwise-prompt-lens/state.toon" - outputs[2]: "Updated .agent/controllers/teamwise-prompt-lens/state.toon","Stable state fields for prompt intake, role synthesis, assumptions, risks, and completion status" - dependencies[1]: "step-04" - verification[1]: - - label: Inspect state schema - commands[2]: "sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon","rg -n \"prompt|assumption|risk|question|ready|status\" .agent/controllers/teamwise-prompt-lens/state.toon" - cleanup_requirements[1]{label,description}: - Avoid overspecified state,Remove transient or redundant fields that do not support repeated planning runs. - status: todo - attempts: 0 - - id: "step-06" - title: Validate With Representative Prompt Cases - purpose: Check that the controller can upgrade rough prompts into clearer briefs without losing user intent. - notes: A few realistic examples are the fastest way to catch missing sections or overcomplicated output rules. - inputs[2]: Updated controller artifacts,Representative sloppy prompts about codebase improvements - outputs[2]: Validation notes,"Any final wording adjustments to goal, standards, plan, or state" - dependencies[1]: "step-05" - verification[2]: - - label: Run artifact review against sample prompts - commands[4]: "sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/goal.md","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md","sed -n '1,320p' .agent/controllers/teamwise-prompt-lens/plan.toon","sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/state.toon" - - label: Final placeholder sweep - commands[1]: "rg -n \"TODO|placeholder|Describe the goal for this controller|TBD\" .agent/controllers/teamwise-prompt-lens" - cleanup_requirements[1]{label,description}: - Remove ad hoc examples,"Do not leave validation-only sample prompts in production controller files unless intentionally documented." - status: todo - attempts: 0 \ No newline at end of file diff --git a/.agent/controllers/teamwise-prompt-lens/standards.md b/.agent/controllers/teamwise-prompt-lens/standards.md deleted file mode 100644 index be7e4c2..0000000 --- a/.agent/controllers/teamwise-prompt-lens/standards.md +++ /dev/null @@ -1,13 +0,0 @@ -# Standards - -- Treat every incoming prompt as incomplete until assumptions, constraints, and success criteria are made explicit. -- Synthesize perspectives from architecture, product, engineering, QA, and operations when they materially affect the outcome. -- Optimize for maintainable codebase improvements, not clever one-off patches. -- Preserve the user's core intent while upgrading precision, scope control, and technical quality. -- Make missing information visible as assumptions or open questions instead of silently inventing product or system behavior. -- Require clear deliverables, acceptance criteria, and verification expectations in the rewritten prompt. -- Call out risks, dependencies, migration concerns, and likely regression areas when relevant. -- Keep outputs concise enough for autonomous execution, but complete enough to avoid avoidable follow-up. -- Prefer incremental, reviewable changes that can keep tests green throughout execution. -- Eliminate placeholder language, vague directives, and non-actionable advice from controller artifacts. -- Leave tests green. \ No newline at end of file diff --git a/.agent/controllers/teamwise-prompt-lens/state.toon b/.agent/controllers/teamwise-prompt-lens/state.toon deleted file mode 100644 index 0af9d6b..0000000 --- a/.agent/controllers/teamwise-prompt-lens/state.toon +++ /dev/null @@ -1,30 +0,0 @@ -version: 1 -phase: executing -stop_reason: null -goal_status: "in-progress" -goal_revision: 1 -current_step_id: "step-01" -iteration: 5 -replan_required: false -completed_steps[0]: -blocked_steps[0]: -last_verification: - passed: false - summary: One or more commands failed - commands[4]: "sed -n '1,140p' .agent/controllers/teamwise-prompt-lens/plan.toon","sed -n '1,220p' .agent/controllers/teamwise-prompt-lens/state.toon","rg -n \"placeholder-heavy|Replace the placeholder Markdown|TODO|Describe the goal for this controller|TBD\" .agent/controllers/teamwise-prompt-lens | rg -v -F \"commands[\"","git status --short .agent/controllers/teamwise-prompt-lens" - output[2]: "version: 1\ngoal_summary: \"Define a team-oriented planning controller that transforms rough prompts into implementation-ready improvement briefs using cross-functional software perspectives.\"\nsteps[6]:\n - id: \"step-01\"\n title: Audit Current Controller Artifacts\n purpose: \"Inspect the existing goal, standards, plan, and state files to confirm inventory, identify stale metadata, and preserve any useful structure.\"\n notes: \"Completed. Audit confirmed all four controller artifacts exist. `goal.md` and `standards.md` already capture the requested cross-functional prompt-upgrade intent; stale content is limited to plan metadata that still assumed placeholder markdown and a state file that had not yet recorded audit outputs.\"\n inputs[4]: \".agent/controllers/teamwise-prompt-lens/goal.md\",\".agent/controllers/teamwise-prompt-lens/standards.md\",\".agent/controllers/teamwise-prompt-lens/plan.toon\",\".agent/controllers/teamwise-prompt-lens/state.toon\"\n outputs[3]: Confirmed file inventory,List of placeholder content to replace,Any existing TOON structure worth preserving\n dependencies[0]:\n verification[1]:\n - label: Read current controller files\n commands[4]: \"sed -n '1,200p' .agent/controllers/teamwise-prompt-lens/goal.md\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/standards.md\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/plan.toon\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon\"\n cleanup_requirements[1]{label,description}:\n No stale placeholders,Remove generic placeholder text once the real controller intent is documented.\n status: done\n attempts: 3\n - id: \"step-02\"\n title: \"Define Cross-Functional Prompt Lens\"\n purpose: \"Specify the software-team roles and the exact review dimensions each role contributes to prompt improvement.\"\n notes: The controller needs explicit personas so it consistently upgrades prompts instead of producing generic rewrites.\n inputs[2]: \"User request for architect, QA, senior engineer, product owner, and broader team input\",\"Findings from step-01\"\n outputs[3]: Role list for the prompt lens,\"Per-role review criteria\",Rules for when to include or omit additional roles\n dependencies[1]: \"step-01\"\n verification[1]:\n - label: Check role coverage in artifacts\n commands[1]: \"rg -n \\\"architect|product|senior engineer|qa|operations|security|performance\\\" .agent/controllers/teamwise-prompt-lens\"\n cleanup_requirements[1]{label,description}:\n Avoid role sprawl,Keep the persona set opinionated and reusable rather than listing every possible specialty.\n status: todo\n attempts: 0\n - id: \"step-03\"\n title: Refine Goal And Standards\n purpose: \"Tighten the existing Markdown into controller-specific guidance that matches the desired teamwise prompt transformation behavior.\"\n notes: The current Markdown is directionally correct, but it still needs sharper controller-specific wording and explicit cross-functional review rules before downstream planning can rely on it.\n inputs[2]: \"Outputs from step-01\",\"Outputs from step-02\"\n outputs[2]: \"Updated .agent/controllers/teamwise-prompt-lens/goal.md\",\"Updated .agent/controllers/teamwise-prompt-lens/standards.md\"\n dependencies[1]: \"step-02\"\n verification[1]:\n - label: Validate rewritten Markdown content\n commands[3]: \"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/goal.md\",\"sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md\",\"rg -n \\\"Describe the goal for this controller|TODO|TBD\\\" .agent/controllers/teamwise-prompt-lens/goal.md .agent/controllers/teamwise-prompt-lens/standards.md\"\n cleanup_requirements[1]{label,description}:\n Keep standards actionable,Remove vague quality slogans unless they imply a concrete execution rule.\n status: todo\n attempts: 0\n - id: \"step-04\"\n title: Author Planner Workflow In TOON\n purpose: \"Encode the planning workflow so the controller consistently turns sloppy prompts into structured, execution-ready briefs.\"\n notes: The main behavioral logic belongs in the plan file because the controller is operating in planning mode.\n inputs[2]: Rewritten goal and standards,\"Cross-functional prompt lens definition\"\n outputs[2]: \"Updated .agent/controllers/teamwise-prompt-lens/plan.toon with ordered planning behavior\",\"Explicit output sections for rewritten prompt, assumptions, risks, acceptance criteria, and verification\"\n dependencies[1]: \"step-03\"\n verification[1]:\n - label: Review plan structure\n commands[2]: \"sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/plan.toon\",\"rg -n \\\"assumptions|risks|acceptance criteria|verification|rewrite|team\\\" .agent/controllers/teamwise-prompt-lens/plan.toon\"\n cleanup_requirements[1]{label,description}:\n No dead branches,Remove unused workflow branches or duplicate instructions that would confuse autonomous execution.\n status: todo\n attempts: 0\n - id: \"step-05\"\n title: Initialize Stateful Planning Data\n purpose: \"Define the minimal controller state needed to track prompt quality, assumptions, open questions, and plan readiness across runs.\"\n notes: State should stay minimal so the controller remains predictable and maintainable.\n inputs[2]: \"Planner workflow from step-04\",\"Existing .agent/controllers/teamwise-prompt-lens/state.toon\"\n outputs[2]: \"Updated .agent/controllers/teamwise-prompt-lens/state.toon\",\"Stable state fields for prompt intake, role synthesis, assumptions, risks, and completion status\"\n dependencies[1]: \"step-04\"\n verification[1]:\n - label: Inspect state schema\n commands[2]: \"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon\",\"rg -n \\\"prompt|assumption|risk|question|ready|status\\\" .agent/controllers/teamwise-prompt-lens/state.toon\"\n cleanup_requirements[1]{label,description}:\n Avoid overspecified state,Remove transient or redundant fields that do not support repeated planning runs.\n status: todo\n attempts: 0\n - id: \"step-06\"\n title: Validate With Representative Prompt Cases\n purpose: Check that the controller can upgrade rough prompts into clearer briefs without losing user intent.\n notes: A few realistic examples are the fastest way to catch missing sections or overcomplicated output rules.\n inputs[2]: Updated controller artifacts,Representative sloppy prompts about codebase improvements\n outputs[2]: Validation notes,\"Any final wording adjustments to goal, standards, plan, or state\"\n dependencies[1]: \"step-05\"\n verification[2]:\n - label: Run artifact review against sample prompts\n commands[4]: \"sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/goal.md\",\"sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/standards.md\",\"sed -n '1,320p' .agent/controllers/teamwise-prompt-lens/plan.toon\",\"sed -n '1,260p' .agent/controllers/teamwise-prompt-lens/state.toon\"\n - label: Final placeholder sweep\n commands[1]: \"rg -n \\\"TODO|Describe the goal for this controller|TBD\\\" .agent/controllers/teamwise-prompt-lens | rg -v -F \\\"commands[\\\"\"\n cleanup_requirements[1]{label,description}:\n Remove ad hoc examples,\"Do not leave validation-only sample prompts in production controller files unless intentionally documented.\"\n status: todo\n attempts: 0","version: 1\nphase: executing\nstop_reason: null\ngoal_status: \"in-progress\"\ngoal_revision: 1\ncurrent_step_id: null\niteration: 3\nreplan_required: true\ncompleted_steps[1]: \"step-01\"\nblocked_steps[0]:\nlast_verification:\n passed: true\n summary: Audited the teamwise-prompt-lens controller artifacts and confirmed the current file inventory.\n commands[5]: \"ls -la .agent/controllers/teamwise-prompt-lens\",\"sed -n '1,200p' .agent/controllers/teamwise-prompt-lens/goal.md\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/standards.md\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/plan.toon\",\"sed -n '1,240p' .agent/controllers/teamwise-prompt-lens/state.toon\"\n output[3]: \"Confirmed inventory: goal.md, standards.md, plan.toon, and state.toon are present under .agent/controllers/teamwise-prompt-lens.\",\"goal.md and standards.md already express the requested team-oriented prompt-improvement intent instead of generic placeholder copy.\",\"Stale content to replace is concentrated in plan metadata that still treated the markdown as if it were unfinished and in the state file, which had not yet recorded audit findings.\"\nlast_cleanup_summary:\n passed: true\n summary: Normalized the audit metadata so later steps no longer assume placeholder markdown or fail placeholder sweeps on intentional standards text.\n commands[2]: \"rg -n \\\"placeholder-heavy|Replace the placeholder Markdown\\\" .agent/controllers/teamwise-prompt-lens/plan.toon\",\"rg -n \\\"TODO|Describe the goal for this controller|TBD\\\" .agent/controllers/teamwise-prompt-lens | rg -v -F \\\"commands[\\\"\"\n output[2]: \"Updated step-01 and step-03 wording to reflect that the markdown already exists and now needs refinement rather than replacement.\",\"Tightened placeholder sweeps so they target actual template markers instead of the literal word 'placeholder' used inside standards guidance.\"\nlast_full_test_summary:\n passed: true\n summary: Repository tests stayed green after the controller artifact audit.\n commands[1]: \"cargo test -q\"\n output[1]: \"running 71 tests\\n.......................................................................\\ntest result: ok. 71 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.02s\"\nhistory[1]{timestamp,kind,detail}:\n \"1775297724\",\"step-complete\",\"Completed step-01\"\nnotes[3]: \"Audit complete: all four controller artifacts exist under .agent/controllers/teamwise-prompt-lens.\",\"goal.md and standards.md are already concrete and aligned with the requested cross-functional prompt lens.\",\"Preserve the current TOON structure of ordered steps, explicit dependencies, per-step verification and cleanup requirements, and compact controller state summaries.\"\nplanning_session:\n pending_question: null\n transcript[2]{role,content}:\n user,\"Setup good prompt helper prompts. When i enter a prompt in the controller goal planner i want intput from a full suftware team, architect, qa, senior engineer, product owner, the whole gang. I want my sloppy prompts become well thought out improvements to the codebase.\"\n assistant,Planning completed\nstarted_at: \"1775297212\"\nlast_usage_refresh_at: \"1775297724\"\nlast_usage_input_tokens: null\nlast_usage_output_tokens: null" -last_cleanup_summary: null -last_full_test_summary: null -history[0]: -notes[1]: "Verification failed for step-01." -planning_session: - pending_question: null - transcript[2]{role,content}: - user,"Setup good prompt helper prompts. When i enter a prompt in the controller goal planner i want intput from a full suftware team, architect, qa, senior engineer, product owner, the whole gang. I want my sloppy prompts become well thought out improvements to the codebase." - assistant,Planning completed -started_at: "1775297212" -last_usage_refresh_at: "1775299104" -last_usage_input_tokens: null -last_usage_output_tokens: null -last_usage_primary_window: null -last_usage_secondary_window: null \ No newline at end of file diff --git a/src/app/mod.rs b/src/app/mod.rs index c763b02..df2d14b 100644 --- a/src/app/mod.rs +++ b/src/app/mod.rs @@ -28,6 +28,12 @@ use crate::model::{ use crate::ui::{self, scroll::VerticalScrollState, SessionRenderRow, SessionView, SidebarView}; pub(crate) const USAGE_REFRESH_INTERVAL: Duration = Duration::from_secs(120); +pub(crate) const CREATE_MODELS: [&str; 4] = [ + "gpt-5.4", + "gpt-5.4-mini", + "gpt-5.3-codex", + "gpt-5.3-codex-spark", +]; #[derive(Debug, Clone)] #[allow(clippy::large_enum_variant)] @@ -86,6 +92,9 @@ pub struct App { pub picker_items: Vec, pub picker_selected: usize, pub create_input: String, + pub create_model_index: usize, + pub create_fast_mode: bool, + pub create_allow_branching: bool, pub create_error: Option, pub default_task_path: PathBuf, pub(crate) frame_tick: u64, @@ -100,6 +109,9 @@ impl App { picker_items: Vec::new(), picker_selected: 0, create_input: String::new(), + create_model_index: 0, + create_fast_mode: false, + create_allow_branching: false, create_error: None, default_task_path: default_task_path.clone(), frame_tick: 0, @@ -144,7 +156,11 @@ impl App { let workspace = self.workspace.as_ref()?; Some(StatusSnapshot { controller_id: workspace.task_config.controller_id(), - branch: workspace.task_config.branch.clone(), + branch: if workspace.state.allow_branching { + workspace.task_config.branch.clone() + } else { + "current".to_string() + }, started_at: workspace.state.started_at.clone(), phase: workspace.state.phase.clone(), iteration: workspace.state.iteration, @@ -154,6 +170,25 @@ impl App { }) } + pub(crate) fn create_model(&self) -> &'static str { + CREATE_MODELS + .get(self.create_model_index) + .copied() + .unwrap_or(CREATE_MODELS[0]) + } + + pub(crate) fn cycle_create_model(&mut self) { + self.create_model_index = (self.create_model_index + 1) % CREATE_MODELS.len(); + } + + pub(crate) fn reset_create_form(&mut self) { + self.create_input.clear(); + self.create_model_index = 0; + self.create_fast_mode = false; + self.create_allow_branching = false; + self.create_error = None; + } + pub(crate) fn workspace(&self) -> Option<&WorkspaceRuntime> { self.workspace.as_ref() } diff --git a/src/app/picker.rs b/src/app/picker.rs index 8a9f198..50300c2 100644 --- a/src/app/picker.rs +++ b/src/app/picker.rs @@ -24,13 +24,13 @@ impl App { } KeyCode::Char('n') => { self.screen = Screen::CreateController; - self.create_error = None; + self.reset_create_form(); Ok(false) } KeyCode::Enter => { if self.picker_selected == self.picker_items.len() { self.screen = Screen::CreateController; - self.create_error = None; + self.reset_create_form(); return Ok(false); } @@ -55,6 +55,21 @@ impl App { self.create_error = None; Ok(false) } + KeyCode::F(2) => { + self.cycle_create_model(); + self.create_error = None; + Ok(false) + } + KeyCode::F(3) => { + self.create_fast_mode = !self.create_fast_mode; + self.create_error = None; + Ok(false) + } + KeyCode::F(4) => { + self.create_allow_branching = !self.create_allow_branching; + self.create_error = None; + Ok(false) + } KeyCode::Backspace => { self.create_input.pop(); self.create_error = None; @@ -68,11 +83,15 @@ impl App { return Ok(false); } - match self.create_workspace_from_goal(goal.clone()) { + let model = self.create_model().to_string(); + let fast_mode = self.create_fast_mode; + let allow_branching = self.create_allow_branching; + + match self.create_workspace_from_goal(goal.clone(), model, fast_mode, allow_branching) + { Ok(()) => { self.submit_workspace_input(goal)?; - self.create_input.clear(); - self.create_error = None; + self.reset_create_form(); } Err(error) => { self.create_error = Some(error.to_string()); diff --git a/src/app/runtime/events.rs b/src/app/runtime/events.rs index 09fb097..e9c42a8 100644 --- a/src/app/runtime/events.rs +++ b/src/app/runtime/events.rs @@ -193,6 +193,9 @@ mod tests { picker_items: Vec::new(), picker_selected: 0, create_input: String::new(), + create_model_index: 0, + create_fast_mode: false, + create_allow_branching: false, create_error: None, default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), frame_tick: 0, diff --git a/src/app/runtime/mod.rs b/src/app/runtime/mod.rs index bba8b50..ea50f48 100644 --- a/src/app/runtime/mod.rs +++ b/src/app/runtime/mod.rs @@ -15,8 +15,20 @@ impl App { workspace::open_workspace_from_task_file(self, task_path) } - pub(super) fn create_workspace_from_goal(&mut self, goal: String) -> Result<()> { - workspace::create_workspace_from_goal(self, goal) + pub(super) fn create_workspace_from_goal( + &mut self, + goal: String, + model: String, + fast_mode: bool, + allow_branching: bool, + ) -> Result<()> { + workspace::create_workspace_from_goal_with_options( + self, + goal, + model, + fast_mode, + allow_branching, + ) } pub(super) fn open_workspace( @@ -39,6 +51,10 @@ impl App { usage::maybe_refresh_usage(self) } + pub(super) fn refresh_usage_now(&mut self) -> Result<()> { + usage::refresh_usage_now(self) + } + pub(super) fn push_local_entry( &mut self, source: SessionSource, diff --git a/src/app/runtime/usage.rs b/src/app/runtime/usage.rs index b115f66..22a70a4 100644 --- a/src/app/runtime/usage.rs +++ b/src/app/runtime/usage.rs @@ -14,14 +14,18 @@ pub(super) fn maybe_refresh_usage(app: &mut App) -> Result<()> { return Ok(()); } + refresh_usage_now(app) +} + +pub(super) fn refresh_usage_now(app: &mut App) -> Result<()> { + let Some(workspace) = app.workspace.as_mut() else { + return Ok(()); + }; + let snapshot = crate::process::refresh_usage_snapshot(&workspace.state); workspace.last_usage_refresh = Instant::now(); workspace.usage_snapshot = snapshot.clone(); - workspace.state.last_usage_refresh_at = snapshot.refreshed_at.clone(); - workspace.state.last_usage_input_tokens = snapshot.input_tokens; - workspace.state.last_usage_output_tokens = snapshot.output_tokens; - workspace.state.last_usage_primary_window = snapshot.primary.clone(); - workspace.state.last_usage_secondary_window = snapshot.secondary.clone(); + crate::process::persist_usage_snapshot(&mut workspace.state, &snapshot); toon::write_state(&workspace.task_config.state_file, &workspace.state)?; Ok(()) } @@ -74,6 +78,9 @@ mod tests { picker_items: Vec::new(), picker_selected: 0, create_input: String::new(), + create_model_index: 0, + create_fast_mode: false, + create_allow_branching: false, create_error: None, default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), frame_tick: 0, diff --git a/src/app/runtime/workspace.rs b/src/app/runtime/workspace.rs index 78ad68f..8670283 100644 --- a/src/app/runtime/workspace.rs +++ b/src/app/runtime/workspace.rs @@ -19,13 +19,24 @@ pub(super) fn open_workspace_from_task_file(app: &mut App, task_path: PathBuf) - open_workspace(app, config, Some(task_path)) } -pub(super) fn create_workspace_from_goal(app: &mut App, goal: String) -> Result<()> { +pub(super) fn create_workspace_from_goal_with_options( + app: &mut App, + goal: String, + model: String, + fast_mode: bool, + allow_branching: bool, +) -> Result<()> { let suggested_id = crate::process::generate_controller_id(&repo::repo_root(), &goal).map_err(|error| { anyhow!("Failed to generate controller id with GPT-5.4 mini: {error:#}") })?; let controller_id = toon::make_unique_controller_id(&suggested_id); let config = toon::create_controller(&app.default_task_path, &controller_id)?; + let mut state = toon::read_state(&config.state_file)?; + state.run_model = model; + state.fast_mode = fast_mode; + state.allow_branching = allow_branching; + toon::write_state(&config.state_file, &state)?; open_workspace(app, config, Some(app.default_task_path.clone())) } @@ -82,6 +93,7 @@ pub(super) fn open_workspace( session_drag_active: false, }); app.screen = Screen::Workspace; + app.refresh_usage_now()?; refresh_picker(app)?; Ok(()) } @@ -171,6 +183,9 @@ mod tests { picker_items: Vec::new(), picker_selected: 3, create_input: String::new(), + create_model_index: 0, + create_fast_mode: false, + create_allow_branching: false, create_error: None, default_task_path, frame_tick: 0, diff --git a/src/app/tests.rs b/src/app/tests.rs index 282c2b6..dc07033 100644 --- a/src/app/tests.rs +++ b/src/app/tests.rs @@ -35,6 +35,9 @@ fn sample_app_with_control_rx() -> (App, Receiver) { }], picker_selected: 0, create_input: String::new(), + create_model_index: 0, + create_fast_mode: false, + create_allow_branching: false, create_error: None, default_task_path: PathBuf::from(DEFAULT_TASK_CONFIG_PATH), frame_tick: 0, @@ -96,6 +99,24 @@ fn planning_mode_blocks_slash_commands() { assert!(last.body.contains("Slash commands")); } +#[test] +fn create_screen_shortcuts_update_run_options() { + let mut app = sample_app(); + app.workspace = None; + app.screen = Screen::CreateController; + + app.handle_create_key(KeyEvent::from(KeyCode::F(2))) + .expect("cycle model"); + app.handle_create_key(KeyEvent::from(KeyCode::F(3))) + .expect("toggle fast"); + app.handle_create_key(KeyEvent::from(KeyCode::F(4))) + .expect("toggle branching"); + + assert_eq!(app.create_model(), "gpt-5.4-mini"); + assert!(app.create_fast_mode); + assert!(app.create_allow_branching); +} + #[test] fn status_command_reports_current_workspace_progress() { let mut app = sample_app(); diff --git a/src/controller/engine.rs b/src/controller/engine.rs index c3c9209..ff65879 100644 --- a/src/controller/engine.rs +++ b/src/controller/engine.rs @@ -35,6 +35,7 @@ pub fn runtime_loop( } let goal_md = toon::read_markdown(&config.goal_file)?; let standards_md = toon::read_markdown(&config.standards_file)?; + refresh_usage_state(&mut state); emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state); match control_rx.try_recv() { @@ -186,11 +187,12 @@ pub fn runtime_loop( plan.mark_active(&step.id); state.current_step_id = Some(step.id.clone()); state.iteration += 1; + refresh_usage_state(&mut state); toon::write_plan(&config.plan_file, &plan)?; toon::write_state(&config.state_file, &state)?; emit_snapshot(&event_tx, &goal_md, &standards_md, &plan, &state); - let exec = executor::implement(&repo_root, &config, &plan, &step, &event_tx)?; + let exec = executor::implement(&repo_root, &config, &state, &plan, &step, &event_tx)?; if goal_checker::needs_goal_clarification(&exec) { state.phase = ControllerPhase::Planning; state.set_stop_reason(format!( @@ -253,6 +255,11 @@ pub fn runtime_loop( Ok(()) } +fn refresh_usage_state(state: &mut crate::model::ControllerState) { + let snapshot = crate::process::refresh_usage_snapshot(state); + crate::process::persist_usage_snapshot(state, &snapshot); +} + fn emit_snapshot( event_tx: &Sender, goal_md: &str, @@ -291,12 +298,25 @@ fn recover_stale_execution_state( state: &mut crate::model::ControllerState, event_tx: &Sender, ) -> Result { - if state.current_step_id.is_some() { + let current_step_id = state.current_step_id.clone(); + let has_stale_current_step = if let Some(current_step_id) = ¤t_step_id { + !plan.steps.iter().any(|step| { + step.id == *current_step_id + && matches!( + step.status, + StepStatus::Todo | StepStatus::Active | StepStatus::Blocked + ) + }) + } else { + false + }; + + if !has_stale_current_step && state.current_step_id.is_some() { return Ok(false); } let active_steps = plan.active_step_ids(); - if active_steps.is_empty() { + if !has_stale_current_step && active_steps.is_empty() { return Ok(false); } @@ -312,11 +332,28 @@ fn recover_stale_execution_state( state.goal_status = GoalStatus::InProgress; state.clear_stop_reason(); state.replan_required = false; - let reason = format!( - "Recovered stale active step state for {}. Reset {} to todo.", - config.controller_id(), - active_steps.join(", ") - ); + state.current_step_id = None; + + let reason = if has_stale_current_step && !active_steps.is_empty() { + format!( + "Recovered stale execution state for {}. Cleared current_step_id {}. Reset {} to todo.", + config.controller_id(), + current_step_id.unwrap_or_default(), + active_steps.join(", ") + ) + } else if has_stale_current_step { + format!( + "Recovered stale execution state for {}. Cleared current_step_id {}.", + config.controller_id(), + current_step_id.unwrap_or_default() + ) + } else { + format!( + "Recovered stale active step state for {}. Reset {} to todo.", + config.controller_id(), + active_steps.join(", ") + ) + }; state.notes.push(reason.clone()); toon::write_plan(&config.plan_file, plan)?; toon::write_state(&config.state_file, state)?; @@ -389,6 +426,53 @@ mod tests { } } + #[test] + fn recovers_stale_current_step_reference() { + let temp = tempdir().expect("tempdir"); + let mut config = TaskConfig::default_for("stale-current"); + let root = temp.path().join(".agent/controllers/stale-current"); + config.goal_file = root.join("goal.md"); + config.plan_file = root.join("plan.toon"); + config.state_file = root.join("state.toon"); + config.standards_file = root.join("standards.md"); + + let mut plan = Plan { + version: 1, + goal_summary: "goal".to_string(), + steps: vec![PlanStep { + id: "s1".to_string(), + title: "Scope".to_string(), + status: StepStatus::Done, + ..PlanStep::default() + }], + }; + let mut state = ControllerState { + phase: ControllerPhase::Blocked, + goal_status: GoalStatus::Blocked, + current_step_id: Some("s1".to_string()), + ..ControllerState::default() + }; + + toon::ensure_controller_files(&config).expect("ensure files"); + let (event_tx, event_rx) = mpsc::channel(); + + let recovered = recover_stale_execution_state(&config, &mut plan, &mut state, &event_tx) + .expect("recover"); + + assert!(recovered); + assert!(matches!(state.current_step_id, None)); + assert!(matches!(state.phase, ControllerPhase::Executing)); + assert!(matches!(state.goal_status, GoalStatus::InProgress)); + assert!(state.stop_reason.is_none()); + let event = event_rx.recv().expect("notice event"); + match event { + AppEvent::Session(entry) => { + assert!(entry.body.contains("Cleared current_step_id s1")); + } + other => panic!("unexpected event: {other:?}"), + } + } + #[test] fn resumable_step_prefers_current_blocked_or_active_step() { let plan = Plan { diff --git a/src/controller/executor.rs b/src/controller/executor.rs index d9facd4..4c716e0 100644 --- a/src/controller/executor.rs +++ b/src/controller/executor.rs @@ -4,7 +4,7 @@ use anyhow::Result; use serde_json::{json, Value}; use crate::app::AppEvent; -use crate::model::{ExecutionResponse, Plan, PlanStep, SessionSource, TaskConfig}; +use crate::model::{ControllerState, ExecutionResponse, Plan, PlanStep, SessionSource, TaskConfig}; use crate::process; use crate::prompt; use crate::storage::toon; @@ -12,6 +12,7 @@ use crate::storage::toon; pub fn implement( repo_root: &std::path::Path, config: &TaskConfig, + state: &ControllerState, plan: &Plan, step: &PlanStep, event_tx: &Sender, @@ -31,11 +32,19 @@ pub fn implement( "- Keep output terse. Use short summaries and short notes.\n", "- If the requested change is already present, return done.\n", "- If the goal is genuinely ambiguous, set needs_goal_clarification=true.\n\n", + "Branching:\n{branching}\n\n", + "Run mode:\n{run_mode}\n\n", "Return empty arrays for verification_commands, test_commands, or notes when not needed.\n\n", "Goal summary:\n{goal}\n\n", "Standards summary:\n{standards}\n\n", "Execution context:\n{context}\n" ), + branching = branching_instruction(config, state), + run_mode = if state.fast_mode { + "fast mode enabled; favor the narrowest sufficient inspection and verification" + } else { + "normal mode" + }, goal = prompt::compact_markdown(&goal_md, 8, 1200), standards = prompt::compact_markdown(&standards_md, 10, 1200), context = serde_json::to_string_pretty(&context)?, @@ -58,6 +67,7 @@ pub fn implement( repo_root, &prompt, &schema, + state.run_model(), event_tx, SessionSource::Executor, Some(step.id.clone()), @@ -65,6 +75,17 @@ pub fn implement( Ok(serde_json::from_str(&raw)?) } +fn branching_instruction(config: &TaskConfig, state: &ControllerState) -> String { + if state.allow_branching { + format!( + "branch creation or switching is allowed only when necessary; preferred branch is {}", + config.branch + ) + } else { + "do not create, switch, or rename git branches; stay on the current branch".to_string() + } +} + fn build_execution_context(plan: &Plan, step: &PlanStep) -> Value { let dependency_steps = step .dependencies diff --git a/src/controller/planner.rs b/src/controller/planner.rs index 192aaf9..728eeee 100644 --- a/src/controller/planner.rs +++ b/src/controller/planner.rs @@ -31,12 +31,25 @@ pub fn refine_without_user_input( "Use step_updates only for new or changed steps.\n", "Use remove_step_ids only for steps that should be deleted.\n", "Use pending_step_order only when pending-step order should change; otherwise return an empty array.\n", + "Do not propose branch creation or branch switching unless branching is explicitly allowed below.\n", "Return only the delta object.\n\n", + "Branching:\n{branching}\n\n", + "Run mode:\n{run_mode}\n\n", "Goal summary:\n{goal}\n\n", "Standards summary:\n{standards}\n\n", "Current plan context:\n{plan}\n\n", "Current state:\n{state}\n" ), + branching = if state.allow_branching { + format!("branching allowed if clearly helpful; preferred branch is {}", config.branch) + } else { + "branching disabled; stay on the current branch".to_string() + }, + run_mode = if state.fast_mode { + "fast mode enabled; prefer fewer, broader steps and minimal delta output" + } else { + "normal mode" + }, goal = prompt::compact_markdown(&goal_md, 8, 1200), standards = prompt::compact_markdown(&standards_md, 10, 1200), plan = serde_json::to_string_pretty(&build_replan_context(plan, state))?, @@ -47,6 +60,7 @@ pub fn refine_without_user_input( repo_root, &prompt, &schema, + state.run_model(), event_tx, SessionSource::Planner, Some(config.controller_id()), diff --git a/src/model/controller.rs b/src/model/controller.rs index 451fad9..0dc75de 100644 --- a/src/model/controller.rs +++ b/src/model/controller.rs @@ -1,7 +1,11 @@ +use std::collections::BTreeMap; + use serde::{Deserialize, Serialize}; use super::{PlanStep, UsageWindow}; +pub const DEFAULT_RUN_MODEL: &str = "gpt-5.4"; + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Screen { ControllerPicker, @@ -120,6 +124,10 @@ pub struct ControllerState { pub last_full_test_summary: Option, pub history: Vec, pub notes: Vec, + #[serde(default)] + pub planning_rejection_counters: BTreeMap, + #[serde(default)] + pub planning_annotation_counters: BTreeMap, pub planning_session: PlanningSessionMeta, pub started_at: Option, pub last_usage_refresh_at: Option, @@ -127,6 +135,16 @@ pub struct ControllerState { pub last_usage_output_tokens: Option, pub last_usage_primary_window: Option, pub last_usage_secondary_window: Option, + #[serde(default = "default_run_model")] + pub run_model: String, + #[serde(default)] + pub fast_mode: bool, + #[serde(default)] + pub allow_branching: bool, +} + +fn default_run_model() -> String { + DEFAULT_RUN_MODEL.to_string() } impl Default for ControllerState { @@ -147,6 +165,8 @@ impl Default for ControllerState { last_full_test_summary: None, history: Vec::new(), notes: Vec::new(), + planning_rejection_counters: BTreeMap::new(), + planning_annotation_counters: BTreeMap::new(), planning_session: PlanningSessionMeta::default(), started_at: None, last_usage_refresh_at: None, @@ -154,6 +174,9 @@ impl Default for ControllerState { last_usage_output_tokens: None, last_usage_primary_window: None, last_usage_secondary_window: None, + run_model: default_run_model(), + fast_mode: false, + allow_branching: false, } } } @@ -171,6 +194,15 @@ impl ControllerState { self.stop_reason = None; } + pub fn run_model(&self) -> &str { + let model = self.run_model.trim(); + if model.is_empty() { + DEFAULT_RUN_MODEL + } else { + model + } + } + pub fn latest_notice(&self) -> Option { self.stop_reason .clone() @@ -205,6 +237,32 @@ impl ControllerState { }) } + pub fn planning_guardrail_counter_key(stage: &str, failure_type: &str) -> String { + format!("{stage}:{failure_type}") + } + + pub fn increment_planning_rejection_counter( + &mut self, + stage: &str, + failure_type: &str, + ) -> u32 { + let key = Self::planning_guardrail_counter_key(stage, failure_type); + let value = self.planning_rejection_counters.entry(key).or_insert(0); + *value = value.saturating_add(1); + *value + } + + pub fn increment_planning_annotation_counter( + &mut self, + stage: &str, + failure_type: &str, + ) -> u32 { + let key = Self::planning_guardrail_counter_key(stage, failure_type); + let value = self.planning_annotation_counters.entry(key).or_insert(0); + *value = value.saturating_add(1); + *value + } + pub fn phase_notice(&self) -> Option { match self.phase { ControllerPhase::Blocked => Some( diff --git a/src/model/mod.rs b/src/model/mod.rs index 2a6481c..cf6c7ed 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -9,6 +9,7 @@ mod usage; pub use self::controller::{ CleanupSummary, CommandSummary, ControllerPhase, ControllerState, GoalStatus, HistoryEvent, PlanningSessionMeta, PlanningTurn, Screen, StepStatus, TestSummary, VerificationSummary, + DEFAULT_RUN_MODEL, }; #[allow(unused_imports)] pub use self::plan::{ @@ -17,7 +18,16 @@ pub use self::plan::{ #[allow(unused_imports)] pub use self::response::{ControllerSummary, ExecutionResponse, PlannerResponse}; #[allow(unused_imports)] +pub use self::response::{ + GOAL_PLANNING_CONTRACT_VERSION, LegacyOutputProjection, PLANNING_QUALITY_GATE_VERSION, + PlanningConflictRule, PlanningConflictStrategy, PlanningContract, PlanningQualityDecisionCode, + PlanningQualityGate, PlanningPersona, PlanningPersonaEvidence, PlanningPersonaPass, + LEGACY_GOAL_PLANNING_CONTRACT_VERSION, +}; +#[allow(unused_imports)] pub use self::schema::{ + planner_contract_schema, planning_conflict_rule_schema, planning_conflict_strategy_schema, + planning_contract_schema, planning_persona_pass_schema, planning_persona_schema, cleanup_rule_schema, plan_delta_schema, plan_schema, plan_step_schema, verification_check_schema, }; diff --git a/src/model/response.rs b/src/model/response.rs index dc51c44..6a7bf8e 100644 --- a/src/model/response.rs +++ b/src/model/response.rs @@ -2,6 +2,135 @@ use serde::{Deserialize, Serialize}; use super::{ControllerPhase, Plan}; +pub const GOAL_PLANNING_CONTRACT_VERSION: u32 = 1; +pub const LEGACY_GOAL_PLANNING_CONTRACT_VERSION: u32 = 0; +pub const PLANNING_QUALITY_GATE_VERSION: u32 = 1; + +fn default_planning_contract_version() -> u32 { + LEGACY_GOAL_PLANNING_CONTRACT_VERSION +} + +fn default_planning_quality_gate() -> PlanningQualityGate { + PlanningQualityGate::default() +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum PlanningPersona { + ProductOwner, + SeniorEngineer, + SeniorMaintainer, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum PlanningConflictStrategy { + LatestStageWins, + AppendUnique, + Replace, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PlanningConflictRule { + pub field: String, + pub strategy: PlanningConflictStrategy, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PlanningPersonaEvidence { + pub facts: Vec, + pub assumptions: Vec, + pub questions: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PlanningPersonaPass { + pub persona: PlanningPersona, + pub intent: String, + pub constraints: Vec, + pub risks: Vec, + pub acceptance_criteria: Vec, + pub evidence: PlanningPersonaEvidence, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PlanningContract { + pub contract_version: u32, + pub ordered_personas: Vec, + pub conflict_rules: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct LegacyOutputProjection { + pub goal_md_stage: PlanningPersona, + pub standards_md_stage: PlanningPersona, + pub plan_stage: PlanningPersona, +} + +impl Default for PlanningContract { + fn default() -> Self { + Self { + contract_version: GOAL_PLANNING_CONTRACT_VERSION, + ordered_personas: vec![ + PlanningPersona::ProductOwner, + PlanningPersona::SeniorEngineer, + PlanningPersona::SeniorMaintainer, + ], + conflict_rules: vec![ + PlanningConflictRule { + field: "goal_md".to_string(), + strategy: PlanningConflictStrategy::LatestStageWins, + }, + PlanningConflictRule { + field: "standards_md".to_string(), + strategy: PlanningConflictStrategy::LatestStageWins, + }, + PlanningConflictRule { + field: "plan".to_string(), + strategy: PlanningConflictStrategy::Replace, + }, + PlanningConflictRule { + field: "constraints".to_string(), + strategy: PlanningConflictStrategy::AppendUnique, + }, + PlanningConflictRule { + field: "risks".to_string(), + strategy: PlanningConflictStrategy::AppendUnique, + }, + ], + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(rename_all = "kebab-case")] +pub enum PlanningQualityDecisionCode { + Accept, + Downgraded, + Blocked, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct PlanningQualityGate { + pub quality_gate_version: u32, + pub decision_code: PlanningQualityDecisionCode, + #[serde(default)] + pub rationale_codes: Vec, + #[serde(default)] + pub rationale: Vec, +} + +impl Default for PlanningQualityGate { + fn default() -> Self { + Self { + quality_gate_version: PLANNING_QUALITY_GATE_VERSION, + decision_code: PlanningQualityDecisionCode::Accept, + rationale_codes: Vec::new(), + rationale: Vec::new(), + } + } +} + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct PlannerResponse { pub kind: String, @@ -9,6 +138,16 @@ pub struct PlannerResponse { pub goal_md: Option, pub standards_md: Option, pub plan: Option, + #[serde(default = "default_planning_contract_version")] + pub planning_contract_version: u32, + #[serde(default)] + pub contract: Option, + #[serde(default)] + pub persona_passes: Vec, + #[serde(default)] + pub single_pass_projection: Option, + #[serde(default = "default_planning_quality_gate")] + pub quality_gate: PlanningQualityGate, } #[derive(Debug, Clone, Serialize, Deserialize, Default)] diff --git a/src/model/schema.rs b/src/model/schema.rs index 527fc39..722a501 100644 --- a/src/model/schema.rs +++ b/src/model/schema.rs @@ -15,6 +15,171 @@ pub fn verification_check_schema() -> Value { }) } +pub fn planning_persona_schema() -> Value { + json!({ + "type": "string", + "enum": ["product-owner", "senior-engineer", "senior-maintainer"] + }) +} + +pub fn planning_quality_decision_schema() -> Value { + json!({ + "type": "string", + "enum": ["accept", "downgraded", "blocked"] + }) +} + +pub fn planning_quality_gate_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": ["quality_gate_version", "decision_code", "rationale_codes", "rationale"], + "properties": { + "quality_gate_version": { "type": "integer" }, + "decision_code": planning_quality_decision_schema(), + "rationale_codes": { "type": "array", "items": { "type": "string" } }, + "rationale": { "type": "array", "items": { "type": "string" } } + } + }) +} + +pub fn planning_conflict_strategy_schema() -> Value { + json!({ + "type": "string", + "enum": ["latest-stage-wins", "append-unique", "replace"] + }) +} + +pub fn planning_conflict_rule_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": ["field", "strategy"], + "properties": { + "field": { "type": "string" }, + "strategy": planning_conflict_strategy_schema() + } + }) +} + +pub fn planning_persona_evidence_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": ["facts", "assumptions", "questions"], + "properties": { + "facts": { "type": "array", "items": { "type": "string" } }, + "assumptions": { "type": "array", "items": { "type": "string" } }, + "questions": { "type": "array", "items": { "type": "string" } } + } + }) +} + +pub fn planning_persona_pass_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": [ + "persona", + "intent", + "constraints", + "risks", + "acceptance_criteria", + "evidence" + ], + "properties": { + "persona": planning_persona_schema(), + "intent": { "type": "string" }, + "constraints": { "type": "array", "items": { "type": "string" } }, + "risks": { "type": "array", "items": { "type": "string" } }, + "acceptance_criteria": { "type": "array", "items": { "type": "string" } }, + "evidence": planning_persona_evidence_schema() + } + }) +} + +pub fn planning_contract_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": ["contract_version", "ordered_personas", "conflict_rules"], + "properties": { + "contract_version": { "type": "integer" }, + "ordered_personas": { + "type": "array", + "items": planning_persona_schema(), + "minItems": 3, + "maxItems": 3 + }, + "conflict_rules": { + "type": "array", + "items": planning_conflict_rule_schema() + } + } + }) +} + +pub fn legacy_output_projection_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": ["goal_md_stage", "standards_md_stage", "plan_stage"], + "properties": { + "goal_md_stage": planning_persona_schema(), + "standards_md_stage": planning_persona_schema(), + "plan_stage": planning_persona_schema() + } + }) +} + +pub fn planner_contract_schema() -> Value { + json!({ + "type": "object", + "additionalProperties": false, + "required": [ + "kind", + "question", + "goal_md", + "standards_md", + "plan", + "planning_contract_version", + "contract", + "persona_passes", + "single_pass_projection" + ], + "properties": { + "kind": { "type": "string", "enum": ["question", "final"] }, + "question": { "type": ["string", "null"] }, + "goal_md": { "type": ["string", "null"] }, + "standards_md": { "type": ["string", "null"] }, + "plan": { + "anyOf": [ + plan_schema(), + { "type": "null" } + ] + }, + "planning_contract_version": { "type": "integer" }, + "contract": planning_contract_schema(), + "persona_passes": { + "type": "array", + "items": planning_persona_pass_schema() + }, + "quality_gate": { + "anyOf": [ + planning_quality_gate_schema(), + { "type": "null" } + ] + }, + "single_pass_projection": { + "anyOf": [ + legacy_output_projection_schema(), + { "type": "null" } + ] + } + } + }) +} + pub fn cleanup_rule_schema() -> Value { json!({ "type": "object", @@ -158,4 +323,18 @@ mod tests { false ); } + + #[test] + fn planner_contract_schema_carries_three_pass_contract_fields() { + let schema = planner_contract_schema(); + assert_eq!( + schema["required"], + json!(["kind","question","goal_md","standards_md","plan","planning_contract_version","contract","persona_passes","single_pass_projection"]) + ); + assert_eq!( + schema["properties"]["contract"]["required"], + json!(["contract_version","ordered_personas","conflict_rules"]) + ); + assert!(schema["properties"]["quality_gate"].is_object()); + } } diff --git a/src/planning/forwarder.rs b/src/planning/forwarder.rs index fc1c1fc..76f115a 100644 --- a/src/planning/forwarder.rs +++ b/src/planning/forwarder.rs @@ -1,10 +1,99 @@ -use serde_json::json; +use std::sync::mpsc::Sender; -use crate::model::{self, ControllerState, PlannerResponse, TaskConfig}; +use anyhow::Result; +use serde_json::{json, to_string_pretty}; + +use crate::app::AppEvent; +use crate::model::{ + self, ControllerState, LegacyOutputProjection, PlannerResponse, PlanningConflictStrategy, + PlanningContract, PlanningPersona, PlanningPersonaPass, Plan, SessionEntry, SessionSource, + SessionStream, TaskConfig, +}; use crate::prompt; +use crate::process; +use crate::repo; + +const MAX_TRANSCRIPT_ITEMS: usize = 6; +const MAX_TRANSCRIPT_CHARS: usize = 240; +const PLANNING_PERSONA_TEMPLATE_VERSION: u32 = 1; +const PLANNING_ROLLOUT_ALERT_THRESHOLD: u32 = 3; +const EXPECTED_PERSONA_CHAIN: [&str; 3] = [ + "product-owner", + "senior-engineer", + "senior-maintainer", +]; +const EXPECTED_STAGE_TAGS: [&str; 3] = ["stage-1", "stage-2", "stage-3"]; +const OWNERSHIP_BOUNDARY_MARKERS: [&str; 6] = [ + "ownership", + "owner", + "module boundary", + "interface boundary", + "contract", + "separation", +]; +const ITERATION_CONTEXT_MARKERS: [&str; 6] = [ + "iteration", + "cleanup", + "roll-forward", + "next iteration", + "replan", + "long-term", +]; +const RISKY_SHORTCUT_MARKERS: [&str; 9] = [ + "quick fix", + "workaround", + "hack", + "hardcode", + "band-aid", + "temporary", + "bypass", + "shortcut", + "ad hoc", +]; +const MAINTAINABILITY_MARKERS: [&str; 17] = [ + "maintain", + "maintainability", + "maintainable", + "modularity", + "modular", + "ownership", + "module boundary", + "interface boundary", + "coupling", + "rollback", + "refactor", + "cleanup", + "migration", + "debt", + "evolv", + "separation", + "long-term", +]; +const CORRECTNESS_ONLY_MARKERS: [&str; 10] = [ + "correctness", + "pass", + "passes", + "passing", + "works", + "working", + "regression", + "stable", + "all tests", + "no break", +]; + +#[derive(Debug, Clone)] +struct StageWorkingSet { + goal_md: String, + standards_md: String, + plan: Plan, + constraints: Vec, + risks: Vec, + plan_projection: LegacyOutputProjection, +} pub fn planning_schema() -> serde_json::Value { - json!({ + let legacy = json!({ "type": "object", "additionalProperties": false, "required": ["kind", "question", "goal_md", "standards_md", "plan"], @@ -20,67 +109,1480 @@ pub fn planning_schema() -> serde_json::Value { ] } } - }) + }); + + json!({ "anyOf": [legacy, model::planner_contract_schema()] }) } pub fn build_planning_prompt( - _config: &TaskConfig, + config: &TaskConfig, goal_md: &str, standards_md: &str, state: &ControllerState, latest_user_input: &str, ) -> String { - let transcript = prompt::compact_turns(&state.planning_session.transcript, 6, 240); + build_persona_planning_prompt( + config, + state, + goal_md, + standards_md, + latest_user_input, + &Plan::default(), + &PlanningContract::default(), + &PlanningPersona::ProductOwner, + true, + ) +} - format!( - concat!( - "You are embedded Codex planning mode for a Rust autonomous controller.\n", - "You are only handling the planning phase.\n\n", - "Rules:\n", - "- Ask at most one follow-up question if the goal is still ambiguous.\n", - "- If you have enough information, return kind=final immediately.\n", +pub fn run_planning_pipeline( + repo_root: &std::path::Path, + config: &TaskConfig, + state: &mut ControllerState, + initial_goal_md: &str, + initial_standards_md: &str, + initial_plan: &Plan, + latest_user_input: &str, + event_tx: &Sender, +) -> Result { + let contract = PlanningContract::default(); + let mut working_set = StageWorkingSet { + goal_md: initial_goal_md.to_string(), + standards_md: initial_standards_md.to_string(), + plan: initial_plan.clone(), + constraints: Vec::new(), + risks: Vec::new(), + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + if !contract_has_expected_persona_chain(&contract) { + return Ok(planning_pipeline_question_response( + &contract, + &working_set, + "chain-config", + &[], + "[pipeline] Stage chain configuration is invalid for planner telemetry." + .to_string(), + )); + } + let mut persona_passes = Vec::new(); + + for (stage_index, persona) in contract.ordered_personas.iter().enumerate() { + let allow_question = stage_index == 0; + let prompt = build_persona_planning_prompt( + config, + state, + &working_set.goal_md, + &working_set.standards_md, + latest_user_input, + &working_set.plan, + &contract, + persona, + allow_question, + ); + + let raw = process::run_codex_with_schema( + repo_root, + &prompt, + &planning_schema(), + state.run_model(), + event_tx, + crate::model::SessionSource::Planner, + Some(config.controller_id()), + )?; + let response = parse_planning_response(&raw)?; + let stage_pass = extract_persona_pass(&response, persona).unwrap_or_else(|| { + fallback_persona_pass(persona) + }); + let stage_label = EXPECTED_STAGE_TAGS[stage_index]; + emit_persona_stage_observation( + event_tx, + stage_label, + persona, + &stage_pass, + &response, + &contract, + ); + emit_stage_transition_observation( + event_tx, + stage_label, + persona, + &stage_pass, + &response, + &contract, + ); + + if response.planning_contract_version != contract.contract_version { + record_rollout_counter( + event_tx, + config, + state, + "rejected", + stage_label, + "unsupported-planning-contract-version", + ); + let mut question = format!( + "[pipeline] Rejected stage payload: {stage_label} did not return contract version {}.", + contract.contract_version + ); + if state.fast_mode && !question.starts_with("[pipeline]") { + question = format!("[pipeline] {question}"); + } + let mut blocked_passes = persona_passes.clone(); + blocked_passes.push(stage_pass.clone()); + return Ok(planning_pipeline_question_response( + &contract, + &working_set, + "unsupported-planning-contract-version", + &blocked_passes, + question, + )); + } + + if response.kind == "question" && !allow_question { + record_rollout_counter( + event_tx, + config, + state, + "rejected", + stage_label, + "downstream-question", + ); + return Ok(planning_pipeline_question_response( + &contract, + &working_set, + "downstream-question", + &persona_passes, + format!( + "[pipeline] {stage_label} cannot request clarification before prior stages complete." + ), + )); + } + + merge_stage_pass( + &contract, + &mut working_set, + persona, + &response, + &stage_pass, + )?; + persona_passes.push(stage_pass); + + if response.kind == "question" && allow_question { + let persona_passes = canonicalize_persona_passes(&contract, persona_passes); + let mut question = response + .question + .unwrap_or_else(|| "The planning stage requires clarification.".to_string()); + if state.fast_mode && !question.starts_with("[pipeline]") { + question = format!("[pipeline] {question}"); + } + return Ok(PlannerResponse { + kind: "question".to_string(), + question: Some(question), + goal_md: Some(working_set.goal_md), + standards_md: Some(working_set.standards_md), + plan: Some(working_set.plan), + planning_contract_version: contract.contract_version, + contract: Some(contract), + persona_passes, + single_pass_projection: Some(working_set.plan_projection), + quality_gate: model::PlanningQualityGate::default(), + }); + } + } + + let persona_passes = canonicalize_persona_passes(&contract, persona_passes); + let mut quality_gate = evaluate_quality_gate(state, &working_set, &persona_passes); + if quality_gate.decision_code != model::PlanningQualityDecisionCode::Accept { + for code in quality_gate.rationale_codes.iter() { + if matches!(quality_gate.decision_code, model::PlanningQualityDecisionCode::Blocked) { + record_rollout_counter( + event_tx, + config, + state, + "rejected", + EXPECTED_STAGE_TAGS[2], + code, + ); + } else { + record_rollout_counter( + event_tx, + config, + state, + "annotated", + EXPECTED_STAGE_TAGS[2], + code, + ); + } + } + } + + if quality_gate.decision_code == model::PlanningQualityDecisionCode::Blocked { + if quality_gate.rationale.is_empty() { + quality_gate.rationale.push( + "Quality gate blocked execution because maintainability risks remain unresolved." + .to_string(), + ); + } + let mut question = format!( + "Blocked by quality gate ({}): {}", + quality_gate + .rationale_codes + .first() + .cloned() + .unwrap_or_else(|| "quality-maintenance-risk".to_string()), + quality_gate.rationale.join(" ") + ); + if state.fast_mode && !question.starts_with("[pipeline]") { + question = format!("[pipeline] {question}"); + } + return Ok(PlannerResponse { + kind: "question".to_string(), + question: Some(question), + goal_md: Some(working_set.goal_md), + standards_md: Some(working_set.standards_md), + plan: Some(working_set.plan), + planning_contract_version: contract.contract_version, + contract: Some(contract), + persona_passes, + single_pass_projection: Some(working_set.plan_projection), + quality_gate, + }); + } + + let quality_annotations = quality_gate_annotations(&quality_gate); + if !quality_annotations.is_empty() { + let mut goal_prefix = String::new(); + for annotation in quality_annotations { + goal_prefix.push_str(&format!("\n- {annotation}")); + } + working_set.standards_md = format!( + "{}\n\n## Quality Gate Annotations\n{}\n", + working_set.standards_md, goal_prefix + ); + } + + Ok(PlannerResponse { + kind: "final".to_string(), + question: None, + goal_md: Some(working_set.goal_md), + standards_md: Some(working_set.standards_md), + plan: Some(working_set.plan), + planning_contract_version: contract.contract_version, + contract: Some(contract), + persona_passes, + single_pass_projection: Some(working_set.plan_projection), + quality_gate, + }) +} + +fn planning_pipeline_question_response( + contract: &PlanningContract, + working_set: &StageWorkingSet, + failure_type: &str, + persona_passes: &[PlanningPersonaPass], + mut question: String, +) -> PlannerResponse { + if !question.starts_with("[pipeline") { + question = format!("[pipeline:{failure_type}] {question}"); + } + PlannerResponse { + kind: "question".to_string(), + question: Some(question), + goal_md: Some(working_set.goal_md.clone()), + standards_md: Some(working_set.standards_md.clone()), + plan: Some(working_set.plan.clone()), + planning_contract_version: contract.contract_version, + contract: Some(contract.clone()), + persona_passes: canonicalize_persona_passes(contract, persona_passes.to_vec()), + single_pass_projection: Some(working_set.plan_projection.clone()), + quality_gate: model::PlanningQualityGate::default(), + } +} + +fn contract_has_expected_persona_chain(contract: &PlanningContract) -> bool { + contract.ordered_personas.len() == EXPECTED_PERSONA_CHAIN.len() + && contract + .ordered_personas + .iter() + .map(persona_label) + .eq(EXPECTED_PERSONA_CHAIN.iter().copied()) +} + +fn persona_label(persona: &PlanningPersona) -> &'static str { + match persona { + PlanningPersona::ProductOwner => "product-owner", + PlanningPersona::SeniorEngineer => "senior-engineer", + PlanningPersona::SeniorMaintainer => "senior-maintainer", + } +} + +fn record_rollout_counter( + event_tx: &Sender, + config: &TaskConfig, + state: &mut ControllerState, + category: &str, + stage_label: &str, + reason: &str, +) { + let count = if category == "annotated" { + state.increment_planning_annotation_counter(stage_label, reason) + } else { + state.increment_planning_rejection_counter(stage_label, reason) + }; + + if count == PLANNING_ROLLOUT_ALERT_THRESHOLD { + let _ = event_tx.send(AppEvent::Session(SessionEntry { + source: SessionSource::Warning, + stream: SessionStream::Status, + title: "Planner rollout alert".to_string(), + tag: Some(config.controller_id()), + body: format!( + "Rollout threshold breached for {category} counter {stage_label}:{reason} at {count}." + ), + run_id: repo::next_run_id(), + })); + } +} + +pub fn build_persona_planning_prompt( + config: &TaskConfig, + state: &ControllerState, + goal_md: &str, + standards_md: &str, + latest_user_input: &str, + plan: &Plan, + contract: &PlanningContract, + persona: &PlanningPersona, + _allow_question: bool, +) -> String { + let transcript = prompt::compact_turns( + &state.planning_session.transcript, + MAX_TRANSCRIPT_ITEMS, + MAX_TRANSCRIPT_CHARS, + ); + let contract_json = to_string_pretty(contract).unwrap_or_else(|_| "{}".to_string()); + let plan = to_string_pretty(plan).unwrap_or_else(|_| "{}".to_string()); + let (persona_label, persona_focus, question_rule) = persona_instructions(persona); + + format!( + concat!( + "You are embedded Codex planning mode for a Rust autonomous controller.\n", + "This is the {persona_label} stage of a deterministic three-stage chain.\n", + "Stages must always run in order: product-owner -> senior-engineer -> senior-maintainer.\n", + "Do not ask the user any questions unless explicitly allowed.\n", + "Chain template version: planning-persona-chain-v{template_version}\n", + "Merge composition:\n{merge_rules}\n\n", + "Always return only one JSON object matching the contract.\n\n", + "Rules:\n", + "- Keep the output minimal and execution-safe.\n", + "- Do not invent repository details.\n", "- Always include all response keys.\n", - "- Use null for any field that does not apply in the current response.\n", - "- The final plan must be decision-complete for autonomous execution.\n", - "- The plan should be maintainable and production-quality.\n", - "- Prefer 3 to 6 steps unless the goal truly needs more.\n", - "- Keep each step.note to one short sentence.\n\n", - "Current goal summary:\n{goal_md}\n\n", - "Current standards summary:\n{standards_md}\n\n", + "- Use null for any field that does not apply in this response.\n", + "- Output goal_md, standards_md, and plan should be complete enough for autonomous execution.\n", + "- Return plan steps with one-sentence notes and stable field order.\n", + "- Prefer 3-6 steps unless the goal truly needs more.\n", + "- Keep each plan step.note to one short sentence.\n", + "- Ask at most one follow-up question only when explicitly allowed.\n", + "- Stage focus: {persona_focus}\n", + "- Avoid correctness-only solutions that create high maintenance cost.\n", + "- If maintenance cost is high, keep constraints and risks explicit.\n", + "- If this is not the first stage, do not return kind=question.\n\n", + "Contract:\n{contract}\n\n", + "Persona stage:\n- name: {persona_label}\n\n", + "Conflict instructions:\n", + "- Resolve field conflicts by ordered contract rules; latest stage wins unless append-unique.\n\n", + "Question policy: {question_rule}\n\n", + "Branching:\n{branching}\n\n", + "Run mode:\n{run_mode}\n\n", + "Iteration context:\n{iteration_context}\n\n", + "Current goal summary:\n{goal}\n\n", + "Current standards summary:\n{standards}\n\n", + "Current plan:\n{plan}\n\n", "Recent transcript:\n{transcript}\n\n", "Latest user input:\n{latest}\n\n", "When returning kind=final, include:\n", "- goal_md: rewritten goal markdown\n", "- standards_md: rewritten standards markdown\n", "- plan: structured machine-readable plan object with ordered steps, concise step notes, verification, cleanup requirements, and statuses.\n", - "- Each step.notes field should explain the reason for the step or the current constraint in one short sentence.\n" + "- persona_passes must include this persona intent, constraints, risks, acceptance_criteria, and evidence.\n" ), - goal_md = prompt::compact_markdown(goal_md, 10, 1400), - standards_md = prompt::compact_markdown(standards_md, 10, 1200), + persona_label = persona_label, + persona_focus = persona_focus, + question_rule = question_rule, + template_version = PLANNING_PERSONA_TEMPLATE_VERSION, + merge_rules = build_contract_merge_rules(contract), + contract = contract_json, + plan = plan, + branching = if state.allow_branching { + format!( + "branching is allowed if clearly helpful; preferred branch is {}", + config.branch + ) + } else { + "branching disabled unless the user explicitly asks for it".to_string() + }, + run_mode = if state.fast_mode { + "fast mode enabled; prefer fewer, broader steps".to_string() + } else { + "normal mode".to_string() + }, + iteration_context = serde_json::to_string_pretty(&json!({ + "iteration": state.iteration, + "goal_revision": state.goal_revision, + "replan_required": state.replan_required, + "notes_count": state.notes.len(), + })) + .unwrap_or_else(|_| "{}".to_string()), + goal = prompt::compact_markdown(goal_md, 10, 1400), + standards = prompt::compact_markdown(standards_md, 10, 1200), transcript = transcript, latest = prompt::truncate_text(latest_user_input, 400), ) } pub fn parse_planning_response(raw: &str) -> anyhow::Result { - Ok(serde_json::from_str(raw)?) + let mut response: PlannerResponse = serde_json::from_str(raw)?; + + if response.planning_contract_version == model::LEGACY_GOAL_PLANNING_CONTRACT_VERSION { + response.contract.get_or_insert_with(model::PlanningContract::default); + if response.single_pass_projection.is_none() && matches!(response.kind.as_str(), "final") { + response.single_pass_projection = Some(model::LegacyOutputProjection { + goal_md_stage: model::PlanningPersona::SeniorMaintainer, + standards_md_stage: model::PlanningPersona::SeniorMaintainer, + plan_stage: model::PlanningPersona::SeniorMaintainer, + }); + } + } + + Ok(response) +} + +fn evaluate_quality_gate( + state: &ControllerState, + working_set: &StageWorkingSet, + persona_passes: &[PlanningPersonaPass], +) -> model::PlanningQualityGate { + let mut rationale_codes = Vec::new(); + + if !has_ownership_boundary_signal(working_set) { + rationale_codes.push("missing-ownership-boundaries".to_string()); + } + + if lacks_vague_acceptance_guardrails(persona_passes) { + rationale_codes.push("vague-acceptance-criteria".to_string()); + } + + if has_risky_shortcut_pattern(&working_set.plan) || has_risky_shortcut_pattern_in_passes(persona_passes) + { + rationale_codes.push("risky-shortcut-pattern".to_string()); + } + + if has_correctness_only_optimization(persona_passes) { + rationale_codes.push("correctness-only-optimization".to_string()); + } + + if has_missing_iteration_review_context(state, personaevidence_to_text(persona_passes)) { + rationale_codes.push("missing-iteration-review".to_string()); + } + + let mut rationale = rationale_codes + .iter() + .map(|code| quality_rationale_text(code)) + .collect::>(); + + let decision_code = match rationale_codes.as_slice() { + r if r.contains(&"risky-shortcut-pattern".to_string()) => model::PlanningQualityDecisionCode::Blocked, + [] => model::PlanningQualityDecisionCode::Accept, + r if r.len() <= 2 && !r.contains(&"missing-ownership-boundaries".to_string()) => { + model::PlanningQualityDecisionCode::Downgraded + } + r if !r.contains(&"vague-acceptance-criteria".to_string()) => { + model::PlanningQualityDecisionCode::Downgraded + } + _ => model::PlanningQualityDecisionCode::Downgraded, + }; + + if matches!(decision_code, model::PlanningQualityDecisionCode::Accept) { + rationale.push("plan passes maintainability and iteration-aware quality gates".to_string()); + } + + model::PlanningQualityGate { + quality_gate_version: crate::model::PLANNING_QUALITY_GATE_VERSION, + decision_code, + rationale_codes, + rationale, + } +} + +fn quality_gate_annotations(gate: &model::PlanningQualityGate) -> Vec { + if matches!(gate.decision_code, model::PlanningQualityDecisionCode::Accept) { + return vec!["Iteration-aware review note: confirm long-term ownership and cleanup path." + .to_string()]; + } + + let mut annotations = Vec::new(); + for code in &gate.rationale_codes { + match code.as_str() { + "missing-ownership-boundaries" => { + annotations.push( + "Record ownership boundaries before implementation and align responsibilities per module." + .to_string(), + ); + } + "vague-acceptance-criteria" => { + annotations.push( + "Replace ambiguous success criteria with measurable architecture and maintenance checks." + .to_string(), + ); + } + "risky-shortcut-pattern" => { + annotations.push( + "Remove shortcut approaches and require explicit design-safe alternatives in plan." + .to_string(), + ); + } + "missing-iteration-review" => { + annotations.push( + "Add iteration-aware follow-up/review criteria for rollback, observability, and cleanup." + .to_string(), + ); + } + "correctness-only-optimization" => { + annotations.push( + "Balance correctness goals with architecture and maintenance acceptance criteria." + .to_string(), + ); + } + _ => {} + } + } + annotations +} + +fn quality_rationale_text(code: &str) -> String { + match code { + "missing-ownership-boundaries" => { + "No explicit ownership boundary constraints were provided across persona passes." + .to_string() + } + "vague-acceptance-criteria" => { + "Acceptance criteria lacked explicit architecture or iterability requirements." + .to_string() + } + "risky-shortcut-pattern" => { + "Potential short-term shortcut language appears in constraints, risks, or plan." + .to_string() + } + "missing-iteration-review" => { + "Iteration-aware maintenance review context was not explicitly preserved." + .to_string() + } + "correctness-only-optimization" => { + "Acceptance criteria emphasizes short-term correctness without explicit maintenance goals." + .to_string() + } + _ => format!("Quality check flagged: {code}"), + } +} + +fn has_ownership_boundary_signal(working_set: &StageWorkingSet) -> bool { + let text = (working_set.constraints.join(" ") + + " " + + &working_set.risks.join(" ") + + " " + + &working_set.goal_md) + .to_lowercase(); + OWNERSHIP_BOUNDARY_MARKERS.iter().any(|marker| text.contains(marker)) +} + +fn lacks_vague_acceptance_guardrails(passes: &[PlanningPersonaPass]) -> bool { + let mut has_any_criteria = false; + let mut has_explicit = false; + let mut has_vague = false; + for pass in passes { + if pass.acceptance_criteria.is_empty() { + continue; + } + has_any_criteria = true; + for criteria in &pass.acceptance_criteria { + let c = criteria.to_lowercase(); + if c.len() < 22 + || c.contains("works") + || c.contains("correctness") + || c.contains("pass") + { + has_vague = true; + } + if contains_maintenance_signal(&c) { + has_explicit = true; + } + } + } + if !has_any_criteria { + return true; + } + has_vague && !has_explicit +} + +fn has_correctness_only_optimization(passes: &[PlanningPersonaPass]) -> bool { + let mut has_criteria = false; + let mut all_short_term = true; + let mut has_maintenance_signal = false; + + for pass in passes { + for criteria in &pass.acceptance_criteria { + let criteria = criteria.to_lowercase(); + if criteria.trim().is_empty() { + continue; + } + has_criteria = true; + if contains_maintenance_signal(&criteria) { + has_maintenance_signal = true; + all_short_term = false; + continue; + } + if !is_short_term_criterion(&criteria) { + all_short_term = false; + } + } + } + + has_criteria && all_short_term && !has_maintenance_signal +} + +fn has_risky_shortcut_pattern(plan: &Plan) -> bool { + let mut text = plan.goal_summary.to_lowercase(); + for step in &plan.steps { + if !step.title.is_empty() { + text.push(' '); + text.push_str(&step.title.to_lowercase()); + } + if !step.purpose.is_empty() { + text.push(' '); + text.push_str(&step.purpose.to_lowercase()); + } + if !step.notes.is_empty() { + text.push(' '); + text.push_str(&step.notes.to_lowercase()); + } + } + RISKY_SHORTCUT_MARKERS.iter().any(|marker| text.contains(marker)) +} + +fn has_risky_shortcut_pattern_in_passes(passes: &[PlanningPersonaPass]) -> bool { + let mut text = String::new(); + for pass in passes { + text.push_str(&pass.intent.to_lowercase()); + text.push(' '); + text.push_str(&pass.risks.join(" ").to_lowercase()); + text.push(' '); + text.push_str(&pass.constraints.join(" ").to_lowercase()); + text.push(' '); + } + RISKY_SHORTCUT_MARKERS.iter().any(|marker| text.contains(marker)) +} + +fn has_missing_iteration_review_context(state: &ControllerState, evidence: String) -> bool { + if state.iteration == 0 { + return false; + } + let evidence = evidence.to_lowercase(); + !ITERATION_CONTEXT_MARKERS.iter().any(|marker| evidence.contains(marker)) +} + +fn personaevidence_to_text(passes: &[PlanningPersonaPass]) -> String { + let mut out = String::new(); + for pass in passes { + out.push_str(&pass.acceptance_criteria.join(" ").to_lowercase()); + out.push(' '); + out.push_str(&pass.constraints.join(" ").to_lowercase()); + out.push(' '); + out.push_str(&pass.risks.join(" ").to_lowercase()); + out.push(' '); + } + out +} + +fn emit_persona_stage_observation( + event_tx: &Sender, + stage_label: &str, + persona: &PlanningPersona, + pass: &PlanningPersonaPass, + response: &PlannerResponse, + contract: &PlanningContract, +) { + let _ = event_tx.send(AppEvent::Session(SessionEntry { + source: SessionSource::Planner, + stream: SessionStream::Status, + title: "Persona stage".to_string(), + tag: Some(stage_label.to_string()), + body: format!( + "v{} template=v{} {:?} stage={} kind={} intent=\"{}\" constraints={} risks={} acceptance={} evidence=(f:{}/a:{}/q:{})", + contract.contract_version, + PLANNING_PERSONA_TEMPLATE_VERSION, + persona, + stage_label, + response.kind, + pass.intent, + pass.constraints.len(), + pass.risks.len(), + pass.acceptance_criteria.len(), + pass.evidence.facts.len(), + pass.evidence.assumptions.len(), + pass.evidence.questions.len() + ), + run_id: repo::next_run_id(), + })); +} + +fn emit_stage_transition_observation( + event_tx: &Sender, + stage_label: &str, + persona: &PlanningPersona, + pass: &PlanningPersonaPass, + response: &PlannerResponse, + contract: &PlanningContract, +) { + let _ = event_tx.send(AppEvent::Session(SessionEntry { + source: SessionSource::Planner, + stream: SessionStream::Status, + title: stage_label.to_string(), + tag: Some(format!("v{}", contract.contract_version)), + body: format!( + "template=v{} persona={:?} kind={} constraints={} risks={} acceptance={} evidence=f:{}/a:{}/q:{} intent=\"{}\"", + PLANNING_PERSONA_TEMPLATE_VERSION, + persona, + response.kind, + pass.constraints.len(), + pass.risks.len(), + pass.acceptance_criteria.len(), + pass.evidence.facts.len(), + pass.evidence.assumptions.len(), + pass.evidence.questions.len(), + pass.intent + ), + run_id: repo::next_run_id(), + })); +} + +fn build_contract_merge_rules(contract: &PlanningContract) -> String { + if contract.conflict_rules.is_empty() { + return "No explicit conflict rules configured.".to_string(); + } + + contract + .conflict_rules + .iter() + .map(|rule| format!("- {field}: {strategy:?}", field = rule.field, strategy = rule.strategy)) + .collect::>() + .join("\n") +} + +fn is_short_term_criterion(criteria: &str) -> bool { + CORRECTNESS_ONLY_MARKERS.iter().any(|marker| criteria.contains(marker)) + || criteria.len() < 24 +} + +fn contains_maintenance_signal(criteria: &str) -> bool { + MAINTAINABILITY_MARKERS + .iter() + .any(|marker| criteria.contains(marker)) +} + +fn merge_stage_pass( + contract: &PlanningContract, + working_set: &mut StageWorkingSet, + persona: &PlanningPersona, + response: &PlannerResponse, + stage_pass: &PlanningPersonaPass, +) -> Result<()> { + for rule in &contract.conflict_rules { + match rule.field.as_str() { + "goal_md" => { + if rule.strategy == PlanningConflictStrategy::LatestStageWins { + if let Some(goal_md) = &response.goal_md { + if !goal_md.trim().is_empty() { + working_set.goal_md = goal_md.clone(); + working_set.plan_projection.goal_md_stage = persona.clone(); + } + } + } + } + "standards_md" => { + if rule.strategy == PlanningConflictStrategy::LatestStageWins { + if let Some(standards_md) = &response.standards_md { + if !standards_md.trim().is_empty() { + working_set.standards_md = standards_md.clone(); + working_set.plan_projection.standards_md_stage = persona.clone(); + } + } + } + } + "plan" => { + if rule.strategy == PlanningConflictStrategy::Replace { + if let Some(plan) = &response.plan { + working_set.plan = plan.clone(); + working_set.plan_projection.plan_stage = persona.clone(); + } + } + } + "constraints" => { + if rule.strategy == PlanningConflictStrategy::AppendUnique { + append_unique(&mut working_set.constraints, &stage_pass.constraints); + } + } + "risks" => { + if rule.strategy == PlanningConflictStrategy::AppendUnique { + append_unique(&mut working_set.risks, &stage_pass.risks); + } + } + _ => {} + } + } + + Ok(()) +} + +fn append_unique(target: &mut Vec, incoming: &[String]) { + for item in incoming { + if !item.trim().is_empty() && !target.contains(&item.trim().to_string()) { + target.push(item.trim().to_string()); + } + } +} + +fn extract_persona_pass<'a>( + response: &'a PlannerResponse, + persona: &PlanningPersona, +) -> Option { + response + .persona_passes + .iter() + .rev() + .find(|pass| &pass.persona == persona) + .cloned() +} + +fn fallback_persona_pass(persona: &PlanningPersona) -> PlanningPersonaPass { + PlanningPersonaPass { + persona: persona.clone(), + intent: "Refine the goal through a deterministic maintainable plan pass.".to_string(), + constraints: vec![], + risks: vec!["No explicit pass-level risks were returned.".to_string()], + acceptance_criteria: vec!["Maintainability and execution clarity preserved.".to_string()], + evidence: model::PlanningPersonaEvidence { + facts: vec![], + assumptions: vec![], + questions: vec![], + }, + } +} + +fn persona_instructions(persona: &PlanningPersona) -> (&'static str, &'static str, &'static str) { + match persona { + PlanningPersona::ProductOwner => ( + "product-owner", + "clarify value, non-functional constraints, and measurable outcomes", + "ask at most one question if requirement is ambiguous", + ), + PlanningPersona::SeniorEngineer => ( + "senior-engineer", + "strengthen design and implementation quality, avoiding brittle or one-off fixes", + "do not ask questions", + ), + PlanningPersona::SeniorMaintainer => ( + "senior-maintainer", + "optimize long-term iterability and reduce maintenance risk", + "do not ask questions", + ), + } +} + +pub(crate) fn canonicalize_persona_passes( + contract: &PlanningContract, + passes: Vec, +) -> Vec { + let mut canonical = Vec::new(); + for persona in &contract.ordered_personas { + if let Some(pass) = passes + .iter() + .rev() + .find(|pass| &pass.persona == persona) + .cloned() + { + if !canonical + .iter() + .any(|known: &PlanningPersonaPass| known.persona == pass.persona) + { + canonical.push(pass); + } + } + } + canonical +} + +pub(crate) fn reorder_persona_names(contract: &PlanningContract) -> Vec<&'static str> { + let mut names = Vec::new(); + for persona in &contract.ordered_personas { + names.push(match persona { + PlanningPersona::ProductOwner => "product-owner", + PlanningPersona::SeniorEngineer => "senior-engineer", + PlanningPersona::SeniorMaintainer => "senior-maintainer", + }); + } + names } #[cfg(test)] mod tests { use super::*; + use std::sync::mpsc; #[test] fn planning_schema_requires_all_declared_keys() { let schema = planning_schema(); + let legacy_schema = json!([ + "kind", + "question", + "goal_md", + "standards_md", + "plan" + ]); + let contract_schema = json!([ + "kind", + "question", + "goal_md", + "standards_md", + "plan", + "planning_contract_version", + "contract", + "persona_passes", + "single_pass_projection" + ]); + assert!(schema["anyOf"].is_array()); + assert_eq!(schema["anyOf"][0]["required"], legacy_schema); + assert_eq!(schema["anyOf"][1]["required"], contract_schema); + } + + #[test] + fn parse_planning_response_maps_legacy_payload_for_compatibility() { + let raw = r#"{ + "kind":"final", + "question":null, + "goal_md":"goal", + "standards_md":"standards", + "plan":{"version":1,"goal_summary":"goal","steps":[]} + }"#; + + let response = parse_planning_response(raw).expect("parse legacy plan"); + + assert_eq!(response.planning_contract_version, 0); + assert!(response.contract.is_some()); assert_eq!( - schema["required"], - json!(["kind", "question", "goal_md", "standards_md", "plan"]) + response + .single_pass_projection + .expect("single-pass projection") + .goal_md_stage, + crate::model::PlanningPersona::SeniorMaintainer + ); + } + + fn sample_pass( + persona: PlanningPersona, + intent: &str, + constraints: &[&str], + risks: &[&str], + ) -> PlanningPersonaPass { + PlanningPersonaPass { + persona, + intent: intent.to_string(), + constraints: constraints.iter().map(ToString::to_string).collect(), + risks: risks.iter().map(ToString::to_string).collect(), + acceptance_criteria: vec!["maintainable execution".to_string()], + evidence: model::PlanningPersonaEvidence { + facts: vec!["existing artifact".to_string()], + assumptions: vec!["no blocking dependencies".to_string()], + questions: vec![], + }, + } + } + + fn sample_plan(goal_summary: &str) -> Plan { + Plan { + version: 1, + goal_summary: goal_summary.to_string(), + steps: vec![], + } + } + + fn sample_response( + kind: &str, + goal_md: Option<&str>, + standards_md: Option<&str>, + plan: Option, + ) -> PlannerResponse { + PlannerResponse { + kind: kind.to_string(), + question: None, + goal_md: goal_md.map(str::to_string), + standards_md: standards_md.map(str::to_string), + plan, + planning_contract_version: 1, + contract: None, + persona_passes: vec![], + single_pass_projection: None, + quality_gate: model::PlanningQualityGate::default(), + } + } + + #[test] + fn canonicalize_persona_passes_preserves_contract_order() { + let contract = PlanningContract::default(); + let passes = vec![ + sample_pass( + PlanningPersona::SeniorMaintainer, + "tail pass first", + &["traceable checks"], + &["maintenance debt"], + ), + sample_pass( + PlanningPersona::ProductOwner, + "first pass", + &["value constraints"], + &["clarity debt"], + ), + sample_pass( + PlanningPersona::SeniorEngineer, + "second pass", + &["design constraints"], + &["complexity debt"], + ), + sample_pass( + PlanningPersona::ProductOwner, + "owner refinement", + &[], + &[], + ), + ]; + + let canonical = canonicalize_persona_passes(&contract, passes); + + assert_eq!(canonical.len(), 3); + assert_eq!(canonical[0].persona, PlanningPersona::ProductOwner); + assert_eq!(canonical[0].intent, "owner refinement"); + assert_eq!(canonical[1].persona, PlanningPersona::SeniorEngineer); + assert_eq!(canonical[2].persona, PlanningPersona::SeniorMaintainer); + } + + #[test] + fn merge_stage_pass_applies_conflict_rules() { + let contract = PlanningContract::default(); + let mut working_set = StageWorkingSet { + goal_md: "original goal".to_string(), + standards_md: "original standards".to_string(), + plan: sample_plan("original plan"), + constraints: vec![], + risks: vec![], + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + + let owner_pass = sample_pass( + PlanningPersona::ProductOwner, + "owner intent", + &["reuse existing abstraction", "limit coupling"], + &["coupling drift"], + ); + let owner_response = sample_response( + "final", + Some("goal v1"), + Some("standards v1"), + Some(sample_plan("plan v1")), + ); + merge_stage_pass(&contract, &mut working_set, &PlanningPersona::ProductOwner, &owner_response, &owner_pass) + .expect("owner merge"); + assert_eq!(working_set.plan_projection.goal_md_stage, PlanningPersona::ProductOwner); + assert_eq!(working_set.constraints, vec!["reuse existing abstraction", "limit coupling"]); + + let maintainer_pass = sample_pass( + PlanningPersona::SeniorMaintainer, + "maintainer intent", + &["limit coupling", "add safeguards"], + &["deprecation risk"], + ); + let maintainer_response = sample_response( + "final", + Some("goal v2"), + Some("standards v2"), + Some(sample_plan("plan v2")), + ); + merge_stage_pass( + &contract, + &mut working_set, + &PlanningPersona::SeniorMaintainer, + &maintainer_response, + &maintainer_pass, + ) + .expect("maintainer merge"); + + assert_eq!(working_set.goal_md, "goal v2"); + assert_eq!(working_set.standards_md, "standards v2"); + assert_eq!(working_set.plan.goal_summary, "plan v2"); + assert_eq!( + working_set.constraints, + vec!["reuse existing abstraction", "limit coupling", "add safeguards"] ); assert_eq!( - schema["properties"]["question"]["type"], - json!(["string", "null"]) + working_set.risks, + vec!["coupling drift", "deprecation risk"] ); - assert!(schema["properties"]["plan"]["anyOf"].is_array()); + assert_eq!( + working_set.plan_projection.plan_stage, + PlanningPersona::SeniorMaintainer + ); + } + + #[test] + fn missing_persona_pass_falls_back_to_deterministic_default() { + let response = sample_response("final", Some("goal"), Some("standards"), None); + + let fallback = extract_persona_pass(&response, &PlanningPersona::SeniorEngineer) + .unwrap_or_else(|| fallback_persona_pass(&PlanningPersona::SeniorEngineer)); + + assert_eq!(fallback.persona, PlanningPersona::SeniorEngineer); + assert!(!fallback.risks.is_empty()); + } + + #[test] + fn quality_gate_rejects_risky_shortcuts() { + let contract = PlanningContract::default(); + let mut working_set = StageWorkingSet { + goal_md: "goal".to_string(), + standards_md: "standards".to_string(), + plan: Plan { + version: 1, + goal_summary: "Refactor with quick fix".to_string(), + steps: vec![], + }, + constraints: vec!["use existing abstraction".to_string()], + risks: vec![], + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + let passes = vec![ + sample_pass( + PlanningPersona::ProductOwner, + "owner pass", + &["define ownership"], + &["correctness first"], + ), + sample_pass( + PlanningPersona::SeniorEngineer, + "engineer pass", + &["keep behavior stable"], + &["temporary workaround acceptable"], + ), + sample_pass( + PlanningPersona::SeniorMaintainer, + "maintainer pass", + &["no hardcode"], + &["avoid temporary hacks"], + ), + ]; + let gate = evaluate_quality_gate(&ControllerState::default(), &working_set, &passes); + + assert_eq!(gate.decision_code, model::PlanningQualityDecisionCode::Blocked); + assert!(gate.rationale_codes.contains(&"risky-shortcut-pattern".to_string())); + assert_eq!(gate.rationale.len(), gate.rationale_codes.len()); + } + + #[test] + fn quality_gate_allows_iteration_aware_maintainable_goals() { + let contract = PlanningContract::default(); + let mut state = ControllerState::default(); + state.iteration = 2; + let _ = contract; + + let working_set = StageWorkingSet { + goal_md: "goal".to_string(), + standards_md: "standards with iteration context".to_string(), + plan: Plan { + version: 1, + goal_summary: "Refactor interface ownership".to_string(), + steps: vec![], + }, + constraints: vec![ + "Respect module ownership boundaries".to_string(), + "Define rollback path for future iterations".to_string(), + ], + risks: vec!["coupling drift".to_string()], + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + + let passes = vec![ + sample_pass( + PlanningPersona::ProductOwner, + "owner pass", + &["define ownership", "long-term stability"], + &["no coupling"], + ), + sample_pass( + PlanningPersona::SeniorEngineer, + "engineer pass", + &["keep boundaries explicit"], + &["migration scheduling"], + ), + sample_pass( + PlanningPersona::SeniorMaintainer, + "maintainer pass", + &["review before next iteration"], + &["migration debt tracking"], + ), + ]; + let gate = evaluate_quality_gate(&state, &working_set, &passes); + + assert_eq!(gate.decision_code, model::PlanningQualityDecisionCode::Accept); + assert!(gate.rationale.contains( + &"plan passes maintainability and iteration-aware quality gates".to_string() + )); + } + + #[test] + fn build_persona_prompt_is_versioned_and_stage_ordered() { + let config = TaskConfig::default_for("longview-planner"); + let state = ControllerState::default(); + let plan = Plan::default(); + let contract = PlanningContract::default(); + + let product_prompt = build_persona_planning_prompt( + &config, + &state, + "# Goal\nShip value.", + "## Standards\n", + "Latest input", + &plan, + &contract, + &PlanningPersona::ProductOwner, + true, + ); + + assert!(product_prompt.contains( + "Chain template version: planning-persona-chain-v1" + )); + assert!(product_prompt.contains( + "Stages must always run in order: product-owner -> senior-engineer -> senior-maintainer." + )); + assert!(product_prompt.contains("Question policy:")); + assert!(product_prompt.contains("ask at most one question")); + + let engineer_prompt = build_persona_planning_prompt( + &config, + &state, + "# Goal\nShip value.", + "## Standards\n", + "Latest input", + &plan, + &contract, + &PlanningPersona::SeniorEngineer, + false, + ); + + assert!(engineer_prompt.contains("Question policy: do not ask questions")); + } + + #[test] + fn quality_gate_downgrades_when_ownership_boundaries_are_missing() { + let working_set = StageWorkingSet { + goal_md: "goal".to_string(), + standards_md: "standards".to_string(), + plan: Plan { + version: 1, + goal_summary: "Refactor command input".to_string(), + steps: vec![], + }, + constraints: vec![], + risks: vec![], + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + let passes = vec![ + sample_pass( + PlanningPersona::ProductOwner, + "owner pass", + &["clarify user outcomes"], + &["migration window"], + ), + sample_pass( + PlanningPersona::SeniorEngineer, + "engineer pass", + &["keep behavior safe"], + &["performance coupling"], + ), + sample_pass( + PlanningPersona::SeniorMaintainer, + "maintainer pass", + &["keep maintenance burden explicit"], + &["explicit module ownership debt"], + ), + ]; + let gate = evaluate_quality_gate(&ControllerState::default(), &working_set, &passes); + + assert_eq!(gate.decision_code, model::PlanningQualityDecisionCode::Downgraded); + assert!(gate.rationale_codes.contains(&"missing-ownership-boundaries".to_string())); + } + + #[test] + fn quality_gate_downgrades_when_acceptance_is_correctness_only() { + let working_set = StageWorkingSet { + goal_md: "goal".to_string(), + standards_md: "standards".to_string(), + plan: Plan { + version: 1, + goal_summary: "Refactor interface ownership".to_string(), + steps: vec![], + }, + constraints: vec!["module boundary".to_string()], + risks: vec![], + plan_projection: LegacyOutputProjection { + goal_md_stage: PlanningPersona::ProductOwner, + standards_md_stage: PlanningPersona::ProductOwner, + plan_stage: PlanningPersona::ProductOwner, + }, + }; + let passes = vec![ + PlanningPersonaPass { + persona: PlanningPersona::ProductOwner, + intent: "owner pass".to_string(), + constraints: vec!["module boundary".to_string()], + risks: vec![], + acceptance_criteria: vec!["All tests pass".to_string()], + evidence: model::PlanningPersonaEvidence { + facts: vec![], + assumptions: vec![], + questions: vec![], + }, + }, + PlanningPersonaPass { + persona: PlanningPersona::SeniorEngineer, + intent: "engineer pass".to_string(), + constraints: vec!["release latency".to_string()], + risks: vec!["build fragility".to_string()], + acceptance_criteria: vec!["Passes 10 test cases".to_string()], + evidence: model::PlanningPersonaEvidence { + facts: vec![], + assumptions: vec![], + questions: vec![], + }, + }, + PlanningPersonaPass { + persona: PlanningPersona::SeniorMaintainer, + intent: "maintainer pass".to_string(), + constraints: vec!["stability checks".to_string()], + risks: vec!["time constraint".to_string()], + acceptance_criteria: vec!["Working on existing matrix".to_string()], + evidence: model::PlanningPersonaEvidence { + facts: vec![], + assumptions: vec![], + questions: vec![], + }, + }, + ]; + + let gate = evaluate_quality_gate(&ControllerState::default(), &working_set, &passes); + + assert_eq!(gate.decision_code, model::PlanningQualityDecisionCode::Downgraded); + assert!(gate.rationale_codes.contains(&"vague-acceptance-criteria".to_string())); + assert!(gate.rationale_codes.contains(&"correctness-only-optimization".to_string())); + } + + #[test] + fn quality_gate_annotation_mentions_maintenance_guardrails_for_downgrade() { + let gate = model::PlanningQualityGate { + quality_gate_version: 1, + decision_code: model::PlanningQualityDecisionCode::Downgraded, + rationale_codes: vec![ + "vague-acceptance-criteria".to_string(), + "correctness-only-optimization".to_string(), + ], + rationale: vec![], + }; + + let annotations = quality_gate_annotations(&gate); + + assert_eq!(annotations.len(), 2); + assert!(annotations + .iter() + .any(|a| a.contains("Replace ambiguous success criteria"))); + assert!(annotations + .iter() + .any(|a| a.contains("Balance correctness goals with architecture and maintenance acceptance criteria"))); + } + + #[test] + fn emit_persona_stage_observation_captures_stage_index_and_persona() { + let (event_tx, event_rx) = mpsc::channel(); + let pass = sample_pass( + PlanningPersona::SeniorEngineer, + "engineer intent", + &["limit coupling"], + &["migration debt"], + ); + let response = sample_response("final", Some("goal"), Some("standards"), None); + + emit_persona_stage_observation( + &event_tx, + "stage-2", + &PlanningPersona::SeniorEngineer, + &pass, + &response, + &PlanningContract::default(), + ); + + let event = event_rx.recv().expect("Persona stage event"); + match event { + AppEvent::Session(entry) => { + assert_eq!(entry.title, "Persona stage"); + assert_eq!(entry.tag, Some("stage-2".to_string())); + assert!(entry.body.contains("v1")); + assert!(entry.body.contains("SeniorEngineer")); + } + _ => panic!("expected session event"), + } + + emit_stage_transition_observation( + &event_tx, + "stage-2", + &PlanningPersona::SeniorEngineer, + &pass, + &response, + &PlanningContract::default(), + ); + + let event = event_rx.recv().expect("stage-2 transition event"); + match event { + AppEvent::Session(entry) => { + assert_eq!(entry.title, "stage-2"); + assert_eq!(entry.tag, Some("v1".to_string())); + assert!(entry.body.contains("kind=final")); + assert!(entry.body.contains("SeniorEngineer")); + } + _ => panic!("expected session event"), + } } } diff --git a/src/planning/session.rs b/src/planning/session.rs index 9efdb76..1d4cba6 100644 --- a/src/planning/session.rs +++ b/src/planning/session.rs @@ -3,8 +3,7 @@ use std::sync::mpsc::Sender; use anyhow::Result; use crate::app::AppEvent; -use crate::model::{ControllerPhase, PlannerResponse, PlanningTurn, SessionSource, TaskConfig}; -use crate::process; +use crate::model::{ControllerPhase, PlannerResponse, PlanningTurn, TaskConfig}; use crate::storage::toon; pub fn advance( @@ -16,6 +15,7 @@ pub fn advance( let mut state = toon::read_state(&config.state_file)?; let goal_md = toon::read_markdown(&config.goal_file)?; let standards_md = toon::read_markdown(&config.standards_file)?; + let plan = toon::read_plan(&config.plan_file)?; state.phase = ControllerPhase::Planning; state.clear_stop_reason(); @@ -25,22 +25,16 @@ pub fn advance( }); toon::write_state(&config.state_file, &state)?; - let prompt = crate::planning::forwarder::build_planning_prompt( + let response = crate::planning::forwarder::run_planning_pipeline( + repo_root, config, + &mut state, &goal_md, &standards_md, - &state, + &plan, latest_user_input, - ); - let raw = process::run_codex_with_schema( - repo_root, - &prompt, - &crate::planning::forwarder::planning_schema(), event_tx, - SessionSource::Planner, - Some(config.controller_id()), )?; - let response = crate::planning::forwarder::parse_planning_response(&raw)?; match response.kind.as_str() { "question" => { diff --git a/src/process/codex.rs b/src/process/codex.rs index d54e1ed..a5bd8e0 100644 --- a/src/process/codex.rs +++ b/src/process/codex.rs @@ -19,6 +19,7 @@ pub fn run_codex_with_schema( repo_root: &Path, prompt: &str, schema: &Value, + model: &str, event_tx: &Sender, source: SessionSource, tag: Option, @@ -34,6 +35,8 @@ pub fn run_codex_with_schema( .arg("-C") .arg(repo_root) .arg("--full-auto") + .arg("--model") + .arg(model) .arg("--color") .arg("never") .arg("--output-schema") diff --git a/src/process/mod.rs b/src/process/mod.rs index c6acfb8..76eb50e 100644 --- a/src/process/mod.rs +++ b/src/process/mod.rs @@ -5,4 +5,4 @@ mod usage; pub use self::codex::{generate_controller_id, run_codex_with_schema}; pub use self::shell::run_shell_commands; -pub use self::usage::refresh_usage_snapshot; +pub use self::usage::{persist_usage_snapshot, refresh_usage_snapshot}; diff --git a/src/process/usage.rs b/src/process/usage.rs index fab4622..e8a7217 100644 --- a/src/process/usage.rs +++ b/src/process/usage.rs @@ -10,16 +10,29 @@ use crate::model::{ControllerState, UsageSnapshot, UsageWindow}; use crate::repo; pub fn refresh_usage_snapshot(state: &ControllerState) -> UsageSnapshot { - fetch_live_usage_snapshot().unwrap_or_else(|_| cached_usage_snapshot(state)) + match fetch_live_usage_snapshot() { + Ok(snapshot) => snapshot, + Err(error) => cached_usage_snapshot(state).unwrap_or_else(|| { + UsageSnapshot::unavailable(format!("usage fetch failed: {}", error)) + }), + } } -fn cached_usage_snapshot(state: &ControllerState) -> UsageSnapshot { +pub fn persist_usage_snapshot(state: &mut ControllerState, snapshot: &UsageSnapshot) { + state.last_usage_refresh_at = snapshot.refreshed_at.clone(); + state.last_usage_input_tokens = snapshot.input_tokens; + state.last_usage_output_tokens = snapshot.output_tokens; + state.last_usage_primary_window = snapshot.primary.clone(); + state.last_usage_secondary_window = snapshot.secondary.clone(); +} + +fn cached_usage_snapshot(state: &ControllerState) -> Option { if state.last_usage_primary_window.is_some() || state.last_usage_secondary_window.is_some() || state.last_usage_input_tokens.is_some() || state.last_usage_output_tokens.is_some() { - UsageSnapshot { + Some(UsageSnapshot { input_tokens: state.last_usage_input_tokens, output_tokens: state.last_usage_output_tokens, primary: state.last_usage_primary_window.clone(), @@ -27,9 +40,9 @@ fn cached_usage_snapshot(state: &ControllerState) -> UsageSnapshot { refreshed_at: Some(repo::now_timestamp()), available: true, note: Some("cached snapshot".to_string()), - } + }) } else { - UsageSnapshot::unavailable("codex usage unavailable") + None } } @@ -184,7 +197,7 @@ mod tests { ..ControllerState::default() }; - let snapshot = cached_usage_snapshot(&state); + let snapshot = cached_usage_snapshot(&state).expect("cached snapshot"); assert!(snapshot.available); assert_eq!( snapshot.primary.as_ref().and_then(|window| window.resets_at), @@ -202,7 +215,8 @@ mod tests { #[test] fn refresh_usage_snapshot_falls_back_when_usage_missing() { - let snapshot = cached_usage_snapshot(&ControllerState::default()); + let snapshot = cached_usage_snapshot(&ControllerState::default()) + .unwrap_or_else(|| UsageSnapshot::unavailable("codex usage unavailable")); assert!(!snapshot.available); assert_eq!(snapshot.primary, None); assert_eq!(snapshot.secondary, None); diff --git a/src/storage/toon/controllers.rs b/src/storage/toon/controllers.rs index 8ffa015..2f6c85d 100644 --- a/src/storage/toon/controllers.rs +++ b/src/storage/toon/controllers.rs @@ -58,7 +58,11 @@ pub(crate) fn list_controller_summaries_in(root: &Path) -> Result