hermes-agent-features/tests/scripts/test_semantic_rle_eval.py

40 lines
1.3 KiB
Python

"""Tests for scripts/semantic_rle_eval.py."""
from __future__ import annotations
import importlib.util
import sys
from pathlib import Path
def _load_eval_module():
path = Path(__file__).resolve().parents[2] / "scripts" / "semantic_rle_eval.py"
spec = importlib.util.spec_from_file_location("semantic_rle_eval", path)
assert spec and spec.loader
module = importlib.util.module_from_spec(spec)
sys.modules["semantic_rle_eval"] = module
spec.loader.exec_module(module)
return module
def test_semantic_rle_eval_reports_baseline_misses_and_semantic_passes():
module = _load_eval_module()
report = module.run_eval()
assert report["summary"]["semantic_rle"] == {"passed": 12, "total": 12}
assert report["summary"]["hot_tail_only_baseline"]["passed"] < 12
semantic_rows = [row for row in report["results"] if row["engine"] == "semantic_rle"]
assert all(row["misses"] == [] for row in semantic_rows)
baseline_misses = {
miss
for row in report["results"]
if row["engine"] == "hot_tail_only_baseline"
for miss in row["misses"]
}
assert "current_fact_retained" in baseline_misses
assert "obligation_retained" in baseline_misses
assert "credential_ref_present" in baseline_misses