spore-commons/node/spore_node/governance/parser.py

68 lines
1.7 KiB
Python

"""Parse governance documents with YAML frontmatter.
Expected format:
---
doc_id: spore.governance.consent
doc_kind: protocol
status: active
depends_on:
- spore.governance.membrane
- spore.governance.holons
---
# Document Title
Body content here...
"""
import frontmatter
from pydantic import BaseModel
class GovernanceDoc(BaseModel):
doc_id: str
doc_kind: str
title: str = ""
status: str = "draft"
depends_on: list[str] = []
body: str = ""
frontmatter: dict = {}
def parse_governance_doc(content: str) -> GovernanceDoc:
"""Parse a markdown document with YAML frontmatter into a GovernanceDoc."""
post = frontmatter.loads(content)
meta = dict(post.metadata)
doc_id = meta.pop("doc_id", "")
if not doc_id:
raise ValueError("Missing required frontmatter field: doc_id")
doc_kind = meta.pop("doc_kind", "document")
status = meta.pop("status", "draft")
depends_on = meta.pop("depends_on", [])
title = meta.pop("title", "")
# Extract title from first heading if not in frontmatter
if not title:
for line in post.content.split("\n"):
stripped = line.strip()
if stripped.startswith("# "):
title = stripped[2:].strip()
break
return GovernanceDoc(
doc_id=doc_id,
doc_kind=doc_kind,
title=title,
status=status,
depends_on=depends_on if isinstance(depends_on, list) else [depends_on],
body=post.content,
frontmatter=meta,
)
def parse_many(contents: list[str]) -> list[GovernanceDoc]:
"""Parse multiple governance documents."""
return [parse_governance_doc(c) for c in contents]