fix: slim down token usage

This commit is contained in:
eric
2026-04-04 12:37:50 +02:00
parent 97f329c825
commit 1240ab946b
55 changed files with 6799 additions and 2333 deletions

793
src/process/parser.rs Normal file
View File

@@ -0,0 +1,793 @@
use serde_json::Value;
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct ParsedCodexLine {
pub(crate) title: String,
pub(crate) display: String,
pub(crate) input_tokens: u64,
pub(crate) output_tokens: u64,
}
pub(crate) fn parse_codex_line(line: &str) -> ParsedCodexLine {
let Ok(value) = serde_json::from_str::<Value>(line) else {
return ParsedCodexLine {
title: "Output".to_string(),
display: line.to_string(),
input_tokens: 0,
output_tokens: 0,
};
};
let (title, display) = parse_codex_event(&value).unwrap_or_else(|| {
(
value
.get("type")
.and_then(Value::as_str)
.map(humanize_type)
.unwrap_or_else(|| "Output".to_string()),
first_text(&value).unwrap_or_default(),
)
});
ParsedCodexLine {
title,
display,
input_tokens: collect_usage_tokens(&value, true),
output_tokens: collect_usage_tokens(&value, false),
}
}
fn parse_codex_event(value: &Value) -> Option<(String, String)> {
let event_type = value
.get("type")
.and_then(Value::as_str)
.unwrap_or_default();
match event_type {
"thread.started" | "turn.started" | "turn.completed" => {
Some(("Status".to_string(), String::new()))
}
"item.started" | "item.completed" => value
.get("item")
.and_then(|item| parse_codex_item(event_type, item)),
_ => {
let display = first_text(value)?;
Some((humanize_type(event_type), display))
}
}
}
fn parse_codex_item(event_type: &str, item: &Value) -> Option<(String, String)> {
let item_type = item.get("type").and_then(Value::as_str).unwrap_or_default();
let title = classify_codex_item(item_type, item);
let (title, display) = match title.as_str() {
"Thinking" => render_thinking_event(item)?,
"Command" => (title, render_command_event(event_type, item)?),
"Patch" | "MCP" | "Plugin" => (title, render_tool_event(event_type, item)?),
_ => (title, render_generic_item(item_type, item)?),
};
let display = display.trim().to_string();
(!display.is_empty()).then_some((title, display))
}
fn render_thinking_event(item: &Value) -> Option<(String, String)> {
let text = item
.get("text")
.and_then(Value::as_str)
.map(ToString::to_string)
.or_else(|| first_text(item))?;
if let Some(summary) = summarize_plan_update(&text) {
return Some(("Plan Update".to_string(), summary));
}
Some(("Thinking".to_string(), text))
}
fn summarize_plan_update(text: &str) -> Option<String> {
let value = serde_json::from_str::<Value>(text).ok()?;
if let Some(summary) = summarize_plan_delta_update(&value) {
return Some(summary);
}
let plan = extract_plan_value(&value)?;
let goal_summary = plan.get("goal_summary").and_then(Value::as_str)?.trim();
let steps = plan.get("steps").and_then(Value::as_array)?;
let mut done = 0usize;
let mut active = 0usize;
let mut todo = 0usize;
let mut blocked = 0usize;
for step in steps {
match step
.get("status")
.and_then(Value::as_str)
.unwrap_or_default()
{
"done" => done += 1,
"active" => active += 1,
"blocked" => blocked += 1,
_ => todo += 1,
}
}
let current = steps
.iter()
.find(|step| step.get("status").and_then(Value::as_str) == Some("active"))
.or_else(|| {
steps
.iter()
.find(|step| step.get("status").and_then(Value::as_str) == Some("todo"))
})
.or_else(|| {
steps
.iter()
.find(|step| step.get("status").and_then(Value::as_str) == Some("blocked"))
});
let current_line = current
.map(summarize_plan_step)
.unwrap_or_else(|| "current none".to_string());
Some(format!(
"goal {}\nprogress {} done, {} active, {} todo, {} blocked\n{}",
truncate_text(goal_summary, 120),
done,
active,
todo,
blocked,
current_line
))
}
fn summarize_plan_delta_update(value: &Value) -> Option<String> {
let delta = extract_plan_delta_value(value)?;
let goal_summary = delta.get("goal_summary").and_then(Value::as_str);
let step_updates = delta.get("step_updates").and_then(Value::as_array)?;
let remove_step_ids = delta
.get("remove_step_ids")
.and_then(Value::as_array)
.map(|items| items.len())
.unwrap_or(0);
let pending_step_order = delta
.get("pending_step_order")
.and_then(Value::as_array)
.map(|items| items.len())
.unwrap_or(0);
let headline = goal_summary
.map(|summary| format!("goal {}", truncate_text(summary, 120)))
.unwrap_or_else(|| "goal unchanged".to_string());
let detail = format!(
"updates {} steps, removes {}, reorders {}",
step_updates.len(),
remove_step_ids,
pending_step_order
);
let focus = step_updates
.iter()
.find(|step| step.get("status").and_then(Value::as_str) == Some("active"))
.or_else(|| step_updates.first())
.map(summarize_plan_step)
.unwrap_or_else(|| "current unchanged".to_string());
Some(format!("{headline}\n{detail}\n{focus}"))
}
fn extract_plan_value(value: &Value) -> Option<&Value> {
if value.get("version").is_some()
&& value.get("goal_summary").is_some()
&& value.get("steps").is_some()
{
return Some(value);
}
value.get("plan").filter(|plan| {
plan.get("version").is_some()
&& plan.get("goal_summary").is_some()
&& plan.get("steps").is_some()
})
}
fn extract_plan_delta_value(value: &Value) -> Option<&Value> {
if value.get("step_updates").is_some() && value.get("remove_step_ids").is_some() {
return Some(value);
}
value
.get("plan")
.filter(|plan| plan.get("step_updates").is_some() && plan.get("remove_step_ids").is_some())
}
fn summarize_plan_step(step: &Value) -> String {
let status = step.get("status").and_then(Value::as_str).unwrap_or("todo");
let label = match status {
"active" => "active",
"blocked" => "blocked",
_ => "next",
};
let id = step.get("id").and_then(Value::as_str).unwrap_or("unknown");
let title = step
.get("title")
.and_then(Value::as_str)
.unwrap_or("untitled");
format!("{label} {id}: {}", truncate_text(title, 96))
}
fn truncate_text(text: &str, max_chars: usize) -> String {
let text = text.trim();
if max_chars == 0 || text.chars().count() <= max_chars {
return text.to_string();
}
let suffix = "...";
let keep = max_chars.saturating_sub(suffix.chars().count());
let prefix = text.chars().take(keep).collect::<String>();
format!("{prefix}{suffix}")
}
fn classify_codex_item(item_type: &str, item: &Value) -> String {
let item_type = item_type.to_ascii_lowercase();
let item_name = codex_item_name(item)
.unwrap_or_default()
.to_ascii_lowercase();
if item_type == "agent_message"
|| item_type.contains("reasoning")
|| item_type.contains("thought")
{
return "Thinking".to_string();
}
if item_name.contains("apply_patch") || item_type.contains("patch") {
return "Patch".to_string();
}
if item_type.contains("command") || item.get("command").is_some() {
return "Command".to_string();
}
if item_name.starts_with("mcp__")
|| item_type.contains("mcp")
|| item.get("server").is_some()
|| item.get("server_name").is_some()
{
return "MCP".to_string();
}
if item_type.contains("tool") {
return "Plugin".to_string();
}
humanize_type(if item_type.is_empty() {
"output"
} else {
item_type.as_str()
})
}
fn render_command_event(event_type: &str, item: &Value) -> Option<String> {
if event_type == "item.started" {
return string_field(item, &["command"]).map(|command| format_command_preview(&command));
}
let output = string_field(item, &["aggregated_output", "output", "result"])
.unwrap_or_default()
.trim()
.to_string();
let exit_code = item.get("exit_code").and_then(Value::as_i64);
let output = summarize_diff_output(&output).unwrap_or(output);
let output = truncate_command_output(&output, 6);
match (output.is_empty(), exit_code) {
(false, Some(code)) if code != 0 => Some(format!("{output}\nexit {code}")),
(false, _) => Some(output),
(true, Some(code)) => Some(format!("exit {code}")),
_ => string_field(item, &["command"]).map(|command| format_command_preview(&command)),
}
}
fn format_command_preview(command: &str) -> String {
let command = summarize_heredoc_command(command.trim());
let command = truncate_command_preview_lines(&command, 6);
let command = truncate_command_preview_chars(&command, 240);
format!("$ {command}")
}
fn summarize_heredoc_command(command: &str) -> String {
let mut summarized = Vec::new();
let mut lines = command.lines().peekable();
while let Some(line) = lines.next() {
summarized.push(line.to_string());
let Some(delimiter) = heredoc_delimiter(line) else {
continue;
};
let mut omitted = 0usize;
while let Some(next_line) = lines.peek().copied() {
lines.next();
if is_heredoc_terminator(next_line, &delimiter) {
break;
}
omitted += 1;
}
if omitted > 0 {
summarized.push(format!("... {omitted} heredoc lines omitted"));
}
}
summarized.join("\n")
}
fn heredoc_delimiter(line: &str) -> Option<String> {
let marker = line.find("<<")?;
let mut rest = &line[marker + 2..];
if let Some(stripped) = rest.strip_prefix('-') {
rest = stripped;
}
rest = rest.trim_start();
if rest.is_empty() {
return None;
}
let first = rest.chars().next()?;
if matches!(first, '\'' | '"') {
let quoted = &rest[first.len_utf8()..];
let end = quoted.find(first)?;
return Some(quoted[..end].to_string());
}
let delimiter = rest
.split_whitespace()
.next()?
.trim_matches(|ch: char| matches!(ch, ';' | ')' | '(' | '"' | '\''));
(!delimiter.is_empty()).then(|| delimiter.to_string())
}
fn is_heredoc_terminator(line: &str, delimiter: &str) -> bool {
let trimmed = line.trim();
trimmed == delimiter
|| trimmed
.strip_prefix(delimiter)
.map(|suffix| {
suffix
.chars()
.all(|ch| matches!(ch, '"' | '\'' | ';' | ')' | '('))
})
.unwrap_or(false)
}
fn truncate_command_preview_lines(command: &str, max_lines: usize) -> String {
if max_lines == 0 || command.is_empty() {
return String::new();
}
let lines = command.lines().collect::<Vec<_>>();
if lines.len() <= max_lines {
return command.to_string();
}
let hidden = lines.len().saturating_sub(max_lines);
format!(
"{}\n... {hidden} more command lines omitted",
lines[..max_lines].join("\n")
)
}
fn truncate_command_preview_chars(command: &str, max_chars: usize) -> String {
let char_count = command.chars().count();
if max_chars == 0 || char_count <= max_chars {
return command.to_string();
}
let suffix = "... command text truncated";
let keep = max_chars.saturating_sub(suffix.chars().count());
if keep == 0 {
return suffix.chars().take(max_chars).collect();
}
let prefix = command.chars().take(keep).collect::<String>();
format!("{prefix}{suffix}")
}
fn truncate_command_output(output: &str, max_lines: usize) -> String {
if max_lines == 0 || output.is_empty() {
return String::new();
}
let lines = output.lines().collect::<Vec<_>>();
if lines.len() <= max_lines {
return output.to_string();
}
let hidden = lines.len().saturating_sub(max_lines);
let visible = &lines[lines.len() - max_lines..];
format!(
"... {} earlier lines omitted\n{}",
hidden,
visible.join("\n")
)
}
fn render_tool_event(event_type: &str, item: &Value) -> Option<String> {
let name = codex_item_name(item).unwrap_or_else(|| "tool".to_string());
if event_type == "item.started" {
return Some(name);
}
let output = string_field(
item,
&["output", "result", "aggregated_output", "summary", "text"],
)
.unwrap_or_default()
.trim()
.to_string();
if !output.is_empty() {
return Some(summarize_diff_output(&output).unwrap_or(output));
}
let status = string_field(item, &["status"]).unwrap_or_default();
if !status.is_empty() && status != "completed" {
return Some(status);
}
None
}
fn render_generic_item(item_type: &str, item: &Value) -> Option<String> {
if item_type.eq_ignore_ascii_case("file_change") {
return string_field(item, &["path", "file_path", "filename", "file"]);
}
first_text(item)
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct DiffFileSummary {
path: String,
added: usize,
removed: usize,
}
fn summarize_diff_output(text: &str) -> Option<String> {
let summaries = summarize_unified_diff(text)?;
Some(
summaries
.into_iter()
.map(|summary| {
format!(
"edited {} +{} -{}",
summary.path, summary.added, summary.removed
)
})
.collect::<Vec<_>>()
.join("\n"),
)
}
fn summarize_unified_diff(text: &str) -> Option<Vec<DiffFileSummary>> {
let mut current: Option<DiffFileSummary> = None;
let mut summaries = Vec::new();
let mut saw_diff_marker = false;
for line in text.lines() {
if let Some(path) = diff_file_path(line) {
saw_diff_marker = true;
if let Some(summary) = current.take() {
summaries.push(summary);
}
current = Some(DiffFileSummary {
path,
added: 0,
removed: 0,
});
continue;
}
let Some(summary) = current.as_mut() else {
continue;
};
if line.starts_with("+++ ") || line.starts_with("--- ") || line.starts_with("@@") {
continue;
}
if line.starts_with('+') {
summary.added += 1;
} else if line.starts_with('-') {
summary.removed += 1;
}
}
if let Some(summary) = current.take() {
summaries.push(summary);
}
if !saw_diff_marker || summaries.is_empty() {
None
} else {
Some(summaries)
}
}
fn diff_file_path(line: &str) -> Option<String> {
let rest = line.strip_prefix("diff --git ")?;
let mut parts = rest.split_whitespace();
let _old = parts.next()?;
let new = parts.next()?;
let path = new
.strip_prefix("b/")
.or_else(|| new.strip_prefix("a/"))
.unwrap_or(new);
Some(path.to_string())
}
fn codex_item_name(item: &Value) -> Option<String> {
string_field(
item,
&["tool_name", "tool", "name", "server_name", "server"],
)
}
fn string_field(value: &Value, keys: &[&str]) -> Option<String> {
let Value::Object(map) = value else {
return None;
};
keys.iter()
.find_map(|key| map.get(*key).and_then(Value::as_str))
.map(ToString::to_string)
}
fn humanize_type(raw: &str) -> String {
raw.split(['-', '_', '.'])
.filter(|part| !part.is_empty())
.map(|part| {
let mut chars = part.chars();
match chars.next() {
Some(first) => {
let mut word = String::new();
word.push(first.to_ascii_uppercase());
word.push_str(chars.as_str());
word
}
None => String::new(),
}
})
.collect::<Vec<_>>()
.join(" ")
}
fn first_text(value: &Value) -> Option<String> {
match value {
Value::String(text) => (!is_internal_item_id(text)).then(|| text.clone()),
Value::Array(items) => items.iter().find_map(first_text),
Value::Object(map) => {
for key in ["msg", "message", "text", "content", "summary"] {
if let Some(text) = map.get(key).and_then(Value::as_str) {
if !is_internal_item_id(text) {
return Some(text.to_string());
}
}
}
map.iter()
.filter(|(key, _)| !matches!(key.as_str(), "id" | "type" | "status"))
.find_map(|(_, value)| first_text(value))
}
_ => None,
}
}
fn is_internal_item_id(text: &str) -> bool {
let Some(suffix) = text.strip_prefix("item_") else {
return false;
};
!suffix.is_empty() && suffix.chars().all(|ch| ch.is_ascii_digit())
}
fn collect_usage_tokens(value: &Value, input: bool) -> u64 {
match value {
Value::Array(items) => items
.iter()
.map(|item| collect_usage_tokens(item, input))
.sum(),
Value::Object(map) => {
let mut total = 0;
for (key, child) in map {
let matched_key = if input {
["input_tokens", "input_token_count", "prompt_tokens"]
} else {
["output_tokens", "output_token_count", "completion_tokens"]
};
if matched_key.contains(&key.as_str()) {
total += child.as_u64().unwrap_or_default();
} else {
total += collect_usage_tokens(child, input);
}
}
total
}
_ => 0,
}
}
pub(crate) fn should_ignore_codex_stderr(line: &str) -> bool {
line.contains("failed to stat skills entry")
|| line.trim() == "Reading additional input from stdin..."
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn filters_known_codex_stderr_noise() {
assert!(should_ignore_codex_stderr(
"2026-04-04T03:43:38Z ERROR codex_core_skills::loader: failed to stat skills entry /tmp/foo"
));
assert!(should_ignore_codex_stderr(
"Reading additional input from stdin..."
));
assert!(!should_ignore_codex_stderr("actual planner failure"));
}
#[test]
fn parses_agent_messages_as_thinking() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"Planning next step."}}"#,
);
assert_eq!(parsed.title, "Thinking");
assert_eq!(parsed.display, "Planning next step.");
}
#[test]
fn summarizes_plan_json_thinking_as_plan_update() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"{\"version\":5,\"goal_summary\":\"Refactor remaining modules without changing behavior\",\"steps\":[{\"id\":\"guardrails\",\"title\":\"Add Refactor Guardrails\",\"status\":\"done\"},{\"id\":\"process-modules\",\"title\":\"Split Process Execution And Parsing\",\"status\":\"todo\"}]}"}}"#,
);
assert_eq!(parsed.title, "Plan Update");
assert_eq!(
parsed.display,
"goal Refactor remaining modules without changing behavior\nprogress 1 done, 0 active, 1 todo, 0 blocked\nnext process-modules: Split Process Execution And Parsing"
);
}
#[test]
fn summarizes_nested_plan_json_thinking_as_plan_update() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"{\"kind\":\"final\",\"plan\":{\"version\":5,\"goal_summary\":\"Ship planner cleanup\",\"steps\":[{\"id\":\"planner-cleanup\",\"title\":\"Trim Planner Output\",\"status\":\"active\"}]}}"}}"#,
);
assert_eq!(parsed.title, "Plan Update");
assert_eq!(
parsed.display,
"goal Ship planner cleanup\nprogress 0 done, 1 active, 0 todo, 0 blocked\nactive planner-cleanup: Trim Planner Output"
);
}
#[test]
fn summarizes_plan_delta_thinking_as_plan_update() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_0","type":"agent_message","text":"{\"goal_summary\":null,\"step_updates\":[{\"id\":\"process-modules\",\"title\":\"Split Process Execution And Parsing\",\"status\":\"active\"}],\"remove_step_ids\":[\"old-step\"],\"pending_step_order\":[\"process-modules\"]}"}}"#,
);
assert_eq!(parsed.title, "Plan Update");
assert_eq!(
parsed.display,
"goal unchanged\nupdates 1 steps, removes 1, reorders 1\nactive process-modules: Split Process Execution And Parsing"
);
}
#[test]
fn parses_command_events_into_command_groups() {
let started = parse_codex_line(
r#"{"type":"item.started","item":{"id":"item_1","type":"command_execution","command":"/bin/zsh -lc pwd","status":"in_progress"}}"#,
);
let completed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_1","type":"command_execution","command":"/bin/zsh -lc pwd","aggregated_output":"/tmp/demo\n","exit_code":0,"status":"completed"}}"#,
);
assert_eq!(started.title, "Command");
assert_eq!(started.display, "$ /bin/zsh -lc pwd");
assert_eq!(completed.title, "Command");
assert_eq!(completed.display, "/tmp/demo");
}
#[test]
fn truncates_multiline_started_command_previews() {
let parsed = parse_codex_line(
r#"{"type":"item.started","item":{"id":"item_10","type":"command_execution","command":"/bin/zsh -lc \"line1\nline2\nline3\nline4\nline5\nline6\nline7\"","status":"in_progress"}}"#,
);
assert_eq!(
parsed.display,
"$ /bin/zsh -lc \"line1\nline2\nline3\nline4\nline5\nline6\n... 1 more command lines omitted"
);
}
#[test]
fn redacts_heredoc_bodies_in_command_previews() {
let parsed = parse_codex_line(
r#"{"type":"item.started","item":{"id":"item_11","type":"command_execution","command":"cat > /tmp/demo.rs <<'EOF'\nfirst\nsecond\nEOF\ncargo fmt","status":"in_progress"}}"#,
);
assert!(parsed.display.contains("$ cat > /tmp/demo.rs <<'EOF'"));
assert!(parsed.display.contains("... 2 heredoc lines omitted"));
assert!(parsed.display.contains("cargo fmt"));
assert!(!parsed.display.contains("first\nsecond"));
}
#[test]
fn parses_tool_events_and_nested_usage_tokens() {
let started = parse_codex_line(
r#"{"type":"item.started","item":{"id":"item_2","type":"mcp_tool_call","tool_name":"mcp__playwright__browser_snapshot"}}"#,
);
let completed = parse_codex_line(
r#"{"type":"item.completed","usage":{"prompt_tokens":5},"item":{"id":"item_2","type":"patch_apply","tool_name":"apply_patch","output":"updated file","usage":{"completion_tokens":9}}}"#,
);
assert_eq!(started.title, "MCP");
assert_eq!(started.display, "mcp__playwright__browser_snapshot");
assert_eq!(completed.title, "Patch");
assert_eq!(completed.display, "updated file");
assert_eq!(completed.input_tokens, 5);
assert_eq!(completed.output_tokens, 9);
}
#[test]
fn summarizes_unified_diff_output_by_file() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_3","type":"command_execution","aggregated_output":"diff --git a/src/main.rs b/src/main.rs\nindex 1111111..2222222 100644\n--- a/src/main.rs\n+++ b/src/main.rs\n@@ -1,3 +1,4 @@\n keep\n-old\n+new\n+extra\ndiff --git a/src/lib.rs b/src/lib.rs\nindex 3333333..4444444 100644\n--- a/src/lib.rs\n+++ b/src/lib.rs\n@@ -2,2 +2,1 @@\n-remove\n stay\n","exit_code":0,"status":"completed"}}"#,
);
assert_eq!(
parsed.display,
"edited src/main.rs +2 -1\nedited src/lib.rs +0 -1"
);
}
#[test]
fn leaves_non_diff_command_output_unchanged() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_4","type":"command_execution","aggregated_output":"plain output\nsecond line","exit_code":0,"status":"completed"}}"#,
);
assert_eq!(parsed.display, "plain output\nsecond line");
}
#[test]
fn truncates_long_command_output_to_latest_lines() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_5","type":"command_execution","aggregated_output":"l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8","exit_code":0,"status":"completed"}}"#,
);
assert_eq!(
parsed.display,
"... 2 earlier lines omitted\nl3\nl4\nl5\nl6\nl7\nl8"
);
}
#[test]
fn ignores_internal_item_ids_in_generic_output() {
let parsed = parse_codex_line(
r#"{"type":"item.completed","item":{"id":"item_21","type":"file_change","status":"completed"}}"#,
);
assert!(parsed.display.is_empty());
}
#[test]
fn hides_turn_lifecycle_output_but_keeps_usage() {
let parsed = parse_codex_line(
r#"{"type":"turn.completed","usage":{"input_tokens":12,"output_tokens":34}}"#,
);
assert!(parsed.display.is_empty());
assert_eq!(parsed.input_tokens, 12);
assert_eq!(parsed.output_tokens, 34);
}
}