40 lines
1.3 KiB
Python
40 lines
1.3 KiB
Python
"""Tests for scripts/semantic_rle_eval.py."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import importlib.util
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
|
|
def _load_eval_module():
|
|
path = Path(__file__).resolve().parents[2] / "scripts" / "semantic_rle_eval.py"
|
|
spec = importlib.util.spec_from_file_location("semantic_rle_eval", path)
|
|
assert spec and spec.loader
|
|
module = importlib.util.module_from_spec(spec)
|
|
sys.modules["semantic_rle_eval"] = module
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def test_semantic_rle_eval_reports_baseline_misses_and_semantic_passes():
|
|
module = _load_eval_module()
|
|
|
|
report = module.run_eval()
|
|
|
|
assert report["summary"]["semantic_rle"] == {"passed": 12, "total": 12}
|
|
assert report["summary"]["hot_tail_only_baseline"]["passed"] < 12
|
|
|
|
semantic_rows = [row for row in report["results"] if row["engine"] == "semantic_rle"]
|
|
assert all(row["misses"] == [] for row in semantic_rows)
|
|
|
|
baseline_misses = {
|
|
miss
|
|
for row in report["results"]
|
|
if row["engine"] == "hot_tail_only_baseline"
|
|
for miss in row["misses"]
|
|
}
|
|
assert "current_fact_retained" in baseline_misses
|
|
assert "obligation_retained" in baseline_misses
|
|
assert "credential_ref_present" in baseline_misses
|