1from pydantic import BaseModel, Field, field_validator
2from typing import List, Optional
3
4
5class MarkdownSplitterInput(BaseModel):
6 markdown_text: str = Field(..., description="Markdown content to split")
7 headers_to_split_on: List[str] = Field(
8 default=["#", "##", "###", "####", "#####", "######"],
9 description="Header levels to split on (e.g., ['#', '##'])",
10 )
11 strip_headers: bool = Field(True, description="Remove headers from chunks")
12
13 @field_validator("headers_to_split_on")
14 @classmethod
15 def validate_headers(cls, v: List[str]) -> List[str]:
16 for h in v:
17 stripped = h.strip()
18 if (
19 not stripped.startswith("#")
20 or not stripped.replace("#", "").strip() == ""
21 ):
22 raise ValueError(f"Invalid header format: {h}. Use '#', '##', etc.")
23 return v
24
25
26class MarkdownSplitterOutput(BaseModel):
27 chunks: List[dict] = Field(..., description="Processed chunks with metadata")
28 error: Optional[str] = Field(None, description="Error details")