from typing import List, Literal, Optional
from agno.agent import Agent
from agno.media import File
from agno.models.openai import OpenAIResponses
from pydantic import BaseModel, Field
ClauseCategory = Literal[
"term_and_termination",
"payment",
"confidentiality",
"indemnification",
"limitation_of_liability",
"warranty",
"ip_assignment",
"governing_law",
"dispute_resolution",
"non_compete",
"other",
]
class Clause(BaseModel):
category: ClauseCategory
heading: Optional[str] = Field(None, description="Section heading as printed")
text: str = Field(..., description="Clause text, verbatim")
page: Optional[int] = Field(None, description="1-indexed page where the clause begins")
class Party(BaseModel):
name: str
role: Optional[str] = Field(None, description="e.g. Customer, Vendor, Licensor")
address: Optional[str] = None
class Contract(BaseModel):
title: Optional[str] = None
contract_type: Optional[str] = Field(None, description="e.g. MSA, SOW, NDA, EULA")
parties: List[Party] = Field(default_factory=list)
effective_date: Optional[str] = None
term: Optional[str] = Field(None, description="Stated term, e.g. '3 years from Effective Date'")
governing_law: Optional[str] = None
clauses: List[Clause] = Field(default_factory=list)
agent = Agent(
model=OpenAIResponses(id="gpt-5.5"),
instructions=(
"Extract the contract header and every clause from the attached PDF. "
"Clause text must be verbatim from the document. Assign each clause "
"to the closest category; use 'other' if nothing fits. Do not "
"summarize, paraphrase, or skip clauses."
),
output_schema=Contract,
)
contract = agent.run(
"Extract this contract.",
files=[File(url="https://example.com/msa-acme.pdf")],
).content
# Contract(title='Master Services Agreement', contract_type='MSA',
# parties=[Party(name='Acme Corp', role='Customer'),
# Party(name='Beta Labs', role='Vendor')],
# effective_date='2026-01-15', term='3 years from Effective Date',
# governing_law='State of Delaware',
# clauses=[Clause(category='term_and_termination', ...), ...])