hermes-agent-features/scripts/semantic_rle_eval.py

#!/usr/bin/env python3
"""Deterministic smoke-eval for the experimental semantic_rle context engine.

This is not an LLM quality benchmark. It checks the minimum invariant we need
before trying live Telegram replay: after cold-history compaction, the
model-visible context still contains current facts/obligations, marks stale
facts as superseded, preserves the hot tail byte-for-byte, and does not leak
raw fake secrets from cold turns.
"""

from __future__ import annotations

import argparse
import json
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable

REPO_ROOT = Path(__file__).resolve().parents[1]
if str(REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(REPO_ROOT))

from plugins.context_engine.semantic_rle import SemanticRLEEngine  # noqa: E402

Message = dict[str, Any]


@dataclass(frozen=True)
class Scenario:
    name: str
    messages: list[Message]
    hot_tail_messages: int
    checks: dict[str, Callable[[list[Message]], bool]]


def _render(messages: list[Message]) -> str:
    return "\n".join(str(message.get("content", "")) for message in messages)


def _hot_tail_only(messages: list[Message], hot_tail_messages: int) -> list[Message]:
    system_head = [dict(m) for m in messages if m.get("role") == "system"]
    non_system = [dict(m) for m in messages if m.get("role") != "system"]
    return [*system_head, *non_system[-hot_tail_messages:]]


def _semantic_rle(messages: list[Message], hot_tail_messages: int) -> list[Message]:
    return SemanticRLEEngine(hot_tail_messages=hot_tail_messages).compress(messages)


def _scenarios() -> list[Scenario]:
    fake_token = "sk-test1234567890abcdef1234567890"
    fake_pat = "ghp_abcdef1234567890abcdef1234567890"
    ip = "203.0.113.42"

    supersession_messages = [
        {"role": "system", "content": "You are Hermes."},
        {"role": "user", "content": "server alpha.example is the current deployment target"},
        {"role": "assistant", "content": "Noted: server alpha.example."},
        {"role": "user", "content": "decision: use postgres for the ledger"},
        {"role": "assistant", "content": "I will use postgres."},
        {"role": "user", "content": "todo: compare misses against baseline"},
        {"role": "assistant", "content": "Added comparison todo."},
        {"role": "user", "content": "unresolved question: how often to compact?"},
        {"role": "assistant", "content": "We can measure compaction cadence."},
        {"role": "user", "content": "server beta.example is the current deployment target now"},
        {"role": "assistant", "content": "Switched to beta.example."},
        {"role": "user", "content": "hot tail user message"},
        {"role": "assistant", "content": "hot tail assistant message"},
    ]

    secret_messages = [
        {"role": "system", "content": "You are Hermes."},
        {"role": "user", "content": f"api_key={fake_token} server {ip}"},
        {"role": "assistant", "content": f"token: {fake_pat}"},
        {"role": "user", "content": "todo: rotate the credential reference"},
        {"role": "assistant", "content": "Will track credential refs only."},
        {"role": "user", "content": "hot tail one"},
        {"role": "assistant", "content": "hot tail two"},
    ]

    return [
        Scenario(
            name="supersession_and_obligation",
            messages=supersession_messages,
            hot_tail_messages=2,
            checks={
                "current_fact_retained": lambda m: "server: beta.example" in _render(m),
                "old_fact_marked_superseded": lambda m: "[superseded by beta.example] server: alpha.example" in _render(m),
                "decision_retained": lambda m: "use postgres" in _render(m),
                "obligation_retained": lambda m: "compare misses against baseline" in _render(m),
                "question_retained": lambda m: "how often to compact" in _render(m),
                "hot_tail_preserved": lambda m: m[-2:] == supersession_messages[-2:],
            },
        ),
        Scenario(
            name="cold_secret_redaction",
            messages=secret_messages,
            hot_tail_messages=2,
            checks={
                "raw_fake_token_absent": lambda m: fake_token not in _render(m) and fake_pat not in _render(m),
                "raw_ip_absent": lambda m: ip not in _render(m),
                "credential_ref_present": lambda m: "credential_ref:credential:" in _render(m),
                "ip_redacted_marker_present": lambda m: "[REDACTED_IP]" in _render(m),
                "obligation_retained": lambda m: "rotate the credential reference" in _render(m),
                "hot_tail_preserved": lambda m: m[-2:] == secret_messages[-2:],
            },
        ),
    ]


def run_eval() -> dict[str, Any]:
    engines: dict[str, Callable[[list[Message], int], list[Message]]] = {
        "hot_tail_only_baseline": _hot_tail_only,
        "semantic_rle": _semantic_rle,
    }
    results: list[dict[str, Any]] = []

    for scenario in _scenarios():
        for engine_name, engine_fn in engines.items():
            compacted = engine_fn(scenario.messages, scenario.hot_tail_messages)
            checks = {name: bool(check(compacted)) for name, check in scenario.checks.items()}
            passed = sum(1 for ok in checks.values() if ok)
            results.append(
                {
                    "scenario": scenario.name,
                    "engine": engine_name,
                    "passed": passed,
                    "total": len(checks),
                    "misses": [name for name, ok in checks.items() if not ok],
                    "message_count_before": len(scenario.messages),
                    "message_count_after": len(compacted),
                    "char_count_after": len(_render(compacted)),
                }
            )

    by_engine: dict[str, dict[str, int]] = {}
    for row in results:
        aggregate = by_engine.setdefault(row["engine"], {"passed": 0, "total": 0})
        aggregate["passed"] += int(row["passed"])
        aggregate["total"] += int(row["total"])

    return {"results": results, "summary": by_engine}


def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--json", action="store_true", help="print machine-readable JSON")
    args = parser.parse_args()

    report = run_eval()
    if args.json:
        print(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True))
    else:
        print("Semantic RLE deterministic smoke-eval")
        for engine, row in report["summary"].items():
            print(f"- {engine}: {row['passed']}/{row['total']} invariant checks passed")
        print("Details:")
        for row in report["results"]:
            misses = ", ".join(row["misses"]) or "none"
            print(
                f"- {row['scenario']} / {row['engine']}: "
                f"{row['passed']}/{row['total']}, misses={misses}, "
                f"messages {row['message_count_before']}→{row['message_count_after']}, "
                f"chars_after={row['char_count_after']}"
            )
    return 0


if __name__ == "__main__":
    raise SystemExit(main())