Compare commits
1 Commits
main
...
fix/spec-r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a92d56b988 |
@@ -384,8 +384,16 @@ def _verdict(v: str, feedback: dict) -> dict:
|
||||
|
||||
|
||||
def _section(text: str, name: str) -> str:
|
||||
m = re.search(rf"^##\s+{re.escape(name)}\s*\n(.*?)(?=\n##\s+|\Z)", text, re.S | re.M)
|
||||
return (m.group(1).strip() if m else "")
|
||||
# The prompt's section headers may carry a parenthesized description,
|
||||
# e.g. `## TDD Plan (list the failing tests)`. Accept an optional
|
||||
# `(...)` suffix on the section name so the post-check matches what
|
||||
# the LLM actually emits. Regression-tested in
|
||||
# tests/unit/test_phases_section.py.
|
||||
m = re.search(
|
||||
rf"^##\s+{re.escape(name)}\s*(\([^)]*\))?\s*\n(.*?)(?=\n##\s+|\Z)",
|
||||
text, re.S | re.M,
|
||||
)
|
||||
return (m.group(2).strip() if m else "")
|
||||
|
||||
|
||||
def _parse_file_scope(text: str) -> list[str]:
|
||||
|
||||
@@ -393,6 +393,83 @@ def test_refine_spec_routes_non_empty_ambiguities_to_awaiting_human():
|
||||
)
|
||||
|
||||
|
||||
def test_refine_spec_prompt_section_names_match_post_check():
|
||||
"""The spec-refiner's prompt must use section header names that the
|
||||
post-check `_section()` regex can match.
|
||||
|
||||
Bug history (2026-06-26): the prompt asked for `## Acceptance
|
||||
Criteria (numbered)` (and similar parenthesized descriptions on every
|
||||
other section), but the post-check regex was strict —
|
||||
`^##\\s+<name>\\s*\\n` rejected any parenthesized suffix. The LLM
|
||||
faithfully copied the prompt's headers into its output, the post-check
|
||||
failed to recognize them, every spec went `spec_wrong` on first
|
||||
attempt, and the cycle's loop-breaker sent it back. attempts
|
||||
incremented; eventually parked as `blocked`.
|
||||
|
||||
Fix: broaden the `_section()` regex to `\\s*(\\([^)]*\\))?\\s*\\n` so it
|
||||
accepts both bare headers AND parenthesized descriptions. The prompt
|
||||
keeps its parentheticals (they're useful hints to the LLM about what
|
||||
belongs in each section's body).
|
||||
|
||||
This test pins both sides of the contract:
|
||||
- The post-check regex is permissive (accepts parenthesized suffix).
|
||||
- The prompt's section header list is present and matches what the
|
||||
post-check looks for.
|
||||
|
||||
See: wiki/queries/damascus-orchestrator/spec-refiner-text-parsing-2026-06-26.md
|
||||
for the full gap analysis (recommendation: replace text parsing with
|
||||
Pydantic-in / JSONB-out; tracked as a follow-up story).
|
||||
"""
|
||||
phases_py = (SRC / "phases.py").read_text()
|
||||
refine_body = phases_py.split("def refine_spec", 1)[1].split("\ndef ", 1)[0]
|
||||
|
||||
# 1. The prompt must list the four sections the post-check verifies.
|
||||
# The post-check looks for: Goal, Acceptance Criteria, TDD Plan, Test Command.
|
||||
required_post_check_sections = (
|
||||
"Goal",
|
||||
"Acceptance Criteria",
|
||||
"TDD Plan",
|
||||
"Test Command",
|
||||
)
|
||||
for section in required_post_check_sections:
|
||||
assert f"## {section}" in refine_body, (
|
||||
f"spec-refiner prompt is missing '## {section}' header. "
|
||||
f"The post-check looks for these exact names; if the prompt "
|
||||
f"doesn't list them, the LLM won't emit them."
|
||||
)
|
||||
|
||||
# 2. The prompt's section headers carry parenthesized descriptions
|
||||
# (e.g. `## TDD Plan (list the failing tests)`). These are
|
||||
# intentional hints to the LLM. The post-check regex MUST be
|
||||
# permissive enough to match them — verify the regex source
|
||||
# contains the optional-suffix group.
|
||||
assert r"(\([^)]*\))?" in phases_py, (
|
||||
"The _section() regex in phases.py must contain the optional "
|
||||
"parenthesized-suffix group `(\\([^)]*\\))?` to accept headers "
|
||||
"like `## TDD Plan (list the failing tests)`. Without it, "
|
||||
"every spec fails spec_wrong (the 2026-06-26 bug)."
|
||||
)
|
||||
|
||||
# 3. The prompt's parenthetical hints should be present — they're
|
||||
# what makes the LLM produce well-formed bodies. If someone
|
||||
# strips them, the LLM may emit headers without the suffix but
|
||||
# with empty bodies (acceptable, but the hints are useful).
|
||||
expected_prompt_hints = (
|
||||
"## Acceptance Criteria (numbered)",
|
||||
"## TDD Plan (list the failing tests)",
|
||||
"## File Scope (list of paths/globs the implementation may touch)",
|
||||
"## Test Command (the exact shell command that proves done)",
|
||||
"## Ambiguities (any open questions for a human)",
|
||||
)
|
||||
for hint in expected_prompt_hints:
|
||||
assert hint in refine_body, (
|
||||
f"spec-refiner prompt missing hint '{hint}'. The "
|
||||
f"parenthesized description tells the LLM what belongs in "
|
||||
f"the section's body. The post-check regex accepts this "
|
||||
f"suffix via the optional `(\\([^)]*\\))?` group."
|
||||
)
|
||||
|
||||
|
||||
def test_refine_spec_prompt_includes_row_constraints():
|
||||
"""The spec-refiner's prompt must inject the row's declared `file_scope` and
|
||||
`budget_cycles` so the LLM produces a spec that honors the row's pre-declared
|
||||
|
||||
227
tests/unit/test_phases_section.py
Normal file
227
tests/unit/test_phases_section.py
Normal file
@@ -0,0 +1,227 @@
|
||||
"""
|
||||
Unit tests for phases.py::_section() — the spec-text parser.
|
||||
|
||||
_phases._section() extracts the body of a Markdown section by regex
|
||||
matching the section header. The orchestrator's spec-refiner uses
|
||||
this to verify the LLM-emitted spec has the required sections
|
||||
(Goal, Acceptance Criteria, TDD Plan, Test Command, etc.). If
|
||||
the regex drifts from the section names used in the prompt, every
|
||||
spec fails `spec_wrong` and burns attempts.
|
||||
|
||||
These tests pin the regex behavior so future prompt changes don't
|
||||
silently regress the post-check.
|
||||
|
||||
Run from the repo root:
|
||||
pytest tests/unit/test_phases_section.py -v
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
# Import the function under test from the orchestrator's installed
|
||||
# package. The orchestrator installs its source as `damascus` so
|
||||
# `from damascus.phases import _section` works from any CWD that
|
||||
# has the package on sys.path.
|
||||
from damascus.phases import _section
|
||||
|
||||
|
||||
def test_section_extracts_bare_section_body():
|
||||
"""A section with no parenthesized suffix extracts cleanly."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Ship a feature.\n"
|
||||
"\n"
|
||||
"## Acceptance Criteria\n"
|
||||
"1. It works.\n"
|
||||
"2. Tests pass.\n"
|
||||
)
|
||||
assert _section(text, "Goal") == "Ship a feature."
|
||||
assert _section(text, "Acceptance Criteria") == "1. It works.\n2. Tests pass."
|
||||
|
||||
|
||||
def test_section_extracts_until_next_section():
|
||||
"""Section body ends at the next `## ` header or end of text."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"first section\n"
|
||||
"## TDD Plan\n"
|
||||
"second section\n"
|
||||
)
|
||||
assert _section(text, "Goal") == "first section"
|
||||
assert _section(text, "TDD Plan") == "second section"
|
||||
|
||||
|
||||
def test_section_returns_empty_for_missing_header():
|
||||
"""No match = empty string (not raise)."""
|
||||
text = "## Goal\nShip it."
|
||||
assert _section(text, "Acceptance Criteria") == ""
|
||||
assert _section(text, "Nonexistent Section") == ""
|
||||
|
||||
|
||||
def test_section_ignores_inline_mentions():
|
||||
"""A bare mention of the section name in body text doesn't trigger."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Build the Acceptance Criteria section carefully.\n"
|
||||
)
|
||||
# The body is the Goal's body, NOT a match for "Acceptance Criteria"
|
||||
# (no `## ` prefix in the body line).
|
||||
assert _section(text, "Acceptance Criteria") == ""
|
||||
|
||||
|
||||
def test_section_handles_whitespace_variations():
|
||||
"""Multiple spaces after `##` and trailing whitespace are tolerated."""
|
||||
text = (
|
||||
"## Goal \n"
|
||||
"Ship it.\n"
|
||||
)
|
||||
# The regex's `\s+` after `##` is greedy, so multiple spaces match.
|
||||
# The `\s*` before `\n` swallows trailing whitespace.
|
||||
assert "Ship it" in _section(text, "Goal")
|
||||
|
||||
|
||||
def test_section_matches_only_at_line_start():
|
||||
"""A `## Foo` inside a code fence or quoted line is NOT matched."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Ship it.\n"
|
||||
"\n"
|
||||
" ## Inline-mention\n"
|
||||
"This is in a quote, not a real section.\n"
|
||||
)
|
||||
# Inline-mention has leading whitespace, so the `^` anchor fails.
|
||||
assert _section(text, "Inline-mention") == ""
|
||||
|
||||
|
||||
def test_section_handles_parenthesized_suffix():
|
||||
"""The regex MUST accept `## <name> (description)` suffix.
|
||||
|
||||
The spec-refiner's prompt lists section headers with parenthesized
|
||||
descriptions (e.g. `## TDD Plan (list the failing tests)`) to hint
|
||||
the LLM about what to put in the body. The LLM faithfully copies
|
||||
these into its output. The regex's optional `(\\([^)]*\\))?` group
|
||||
is what makes the post-check match them.
|
||||
|
||||
Before this broadening (2026-06-26), the strict regex `\\s*\\n`
|
||||
rejected `(numbered)` / `(list the failing tests)` and every spec
|
||||
failed `spec_wrong` on first attempt.
|
||||
|
||||
See: wiki/queries/damascus-orchestrator/spec-refiner-text-parsing-2026-06-26.md
|
||||
for the gap analysis (recommends replacing text parsing with
|
||||
Pydantic-in / JSONB-out as a follow-up).
|
||||
"""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Ship a feature.\n"
|
||||
"\n"
|
||||
"## Acceptance Criteria (numbered)\n"
|
||||
"1. Works.\n"
|
||||
"\n"
|
||||
"## TDD Plan (list the failing tests)\n"
|
||||
"- failing test 1\n"
|
||||
"- failing test 2\n"
|
||||
"\n"
|
||||
"## File Scope (list of paths/globs the implementation may touch)\n"
|
||||
"- src/foo.py\n"
|
||||
"\n"
|
||||
"## Test Command (the exact shell command that proves done)\n"
|
||||
"pytest tests/test_foo.py -v\n"
|
||||
"\n"
|
||||
"## Ambiguities (any open questions for a human)\n"
|
||||
"(none)\n"
|
||||
)
|
||||
# The regex MUST match all six sections, including the parenthesized
|
||||
# suffix on each. This is the fix for the 2026-06-26 bug.
|
||||
assert _section(text, "Goal") == "Ship a feature."
|
||||
assert _section(text, "Acceptance Criteria") == "1. Works."
|
||||
assert _section(text, "TDD Plan") == "- failing test 1\n- failing test 2"
|
||||
assert _section(text, "File Scope") == "- src/foo.py"
|
||||
assert _section(text, "Test Command") == "pytest tests/test_foo.py -v"
|
||||
assert _section(text, "Ambiguities") == "(none)"
|
||||
|
||||
|
||||
def test_section_rejects_parenthetical_in_middle_of_name():
|
||||
"""The suffix regex matches `(...)` only AFTER the section name, not
|
||||
embedded in it. `## Acceptance (numbered) Criteria` should NOT match
|
||||
`Acceptance Criteria` because the parenthetical is mid-name."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Real goal.\n"
|
||||
"\n"
|
||||
"## Acceptance (numbered) Criteria\n"
|
||||
"Should not match.\n"
|
||||
)
|
||||
assert _section(text, "Acceptance Criteria") == ""
|
||||
assert _section(text, "Goal") == "Real goal."
|
||||
|
||||
|
||||
def test_section_extracts_complex_multiline_body():
|
||||
"""A section with lists, code blocks, and sub-headings is captured whole."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Build X.\n"
|
||||
"\n"
|
||||
"Details:\n"
|
||||
"- item 1\n"
|
||||
"- item 2\n"
|
||||
"\n"
|
||||
"```bash\n"
|
||||
"echo code block\n"
|
||||
"```\n"
|
||||
"\n"
|
||||
"## Next\n"
|
||||
"Other.\n"
|
||||
)
|
||||
body = _section(text, "Goal")
|
||||
assert "Build X." in body
|
||||
assert "item 1" in body
|
||||
assert "item 2" in body
|
||||
assert "echo code block" in body
|
||||
# Should NOT include the next section
|
||||
assert "Other." not in body
|
||||
|
||||
|
||||
def test_section_required_for_spec_refiner_post_check():
|
||||
"""Integration check: all four sections the post-check requires
|
||||
extract cleanly from a well-formed spec."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"Ship the feature.\n"
|
||||
"\n"
|
||||
"## Acceptance Criteria\n"
|
||||
"1. AC1.\n"
|
||||
"2. AC2.\n"
|
||||
"\n"
|
||||
"## TDD Plan\n"
|
||||
"- failing test 1\n"
|
||||
"- failing test 2\n"
|
||||
"\n"
|
||||
"## File Scope\n"
|
||||
"- src/foo.py\n"
|
||||
"- tests/test_foo.py\n"
|
||||
"\n"
|
||||
"## Test Command\n"
|
||||
"pytest tests/test_foo.py -v\n"
|
||||
"\n"
|
||||
"## Ambiguities\n"
|
||||
"(none)\n"
|
||||
)
|
||||
# This is exactly what the post-check at phases.py:76 verifies.
|
||||
missing = [s for s in ("Goal", "Acceptance Criteria", "TDD Plan", "Test Command")
|
||||
if not _section(text, s)]
|
||||
assert missing == [], f"post-check would flag {missing} as missing"
|
||||
|
||||
|
||||
def test_section_with_extra_blank_lines_in_body():
|
||||
"""Blank lines inside a section body are preserved."""
|
||||
text = (
|
||||
"## Goal\n"
|
||||
"\n"
|
||||
"\n"
|
||||
"Ship it.\n"
|
||||
"\n"
|
||||
"## Next\n"
|
||||
"foo\n"
|
||||
)
|
||||
# The body is whitespace; `strip()` in `_section` removes leading/trailing
|
||||
# whitespace, so the result is "Ship it."
|
||||
assert _section(text, "Goal") == "Ship it."
|
||||
Reference in New Issue
Block a user