"""Pydantic request/response models for AutoIndexer API."""

from __future__ import annotations

from typing import Any, Literal, Optional

from pydantic import BaseModel


class ProjectContext(BaseModel):
    description: str = ""
    research_questions: list[str] = []


class IndexerField(BaseModel):
    name: str
    description: str = ""
    data_type_primary: str = "string"
    data_type_secondary: str = "NA"
    examples: list[str] = []
    examples_mode: Literal["guide", "enum"] = "guide"
    depth: Literal["minimal", "full"] = "minimal"
    extraction_difficulty: str = ""


class IndexerRequest(BaseModel):
    """Request to extract structured fields from title/abstract records."""

    project_id: Optional[int] = None
    model: str = "gpt-5-mini"
    records: list[dict[str, str]]  # Each must have ID, Title, Abstract
    fields: list[IndexerField]
    system_prompt: str = ""
    user_prompt: str = ""
    max_workers: int = 4
    batch_size: int = 50
    project_context: Optional[ProjectContext] = None
    mode: Literal["test", "sample", "full"] = "full"
    test_size: int = 5
    sample_size: int = 20


class IndexerResultRecord(BaseModel):
    ID: str
    indexing_status: str
    extraction_error: Optional[str] = None
    # Additional extracted fields are dynamic


class IndexerResult(BaseModel):
    results: list[dict[str, Any]]
    errors: list[str] = []
    usage: dict[str, Any] = {}
    model_version: Optional[str] = None


# ── Extraction models ──


class EvidenceSpan(BaseModel):
    text: str
    section: Literal["title", "abstract"] = "abstract"


class FieldExtraction(BaseModel):
    value: Any = None  # string | number | bool | list | None
    confidence: float = 0.0
    evidence: list[EvidenceSpan] = []
    reasoning: str = ""
    normalised_value: str = ""


# ── Cost estimation ──


class CostEstimateRequest(BaseModel):
    model: str = "gpt-5-mini"
    fields: list[IndexerField]
    record_count: int


class CostEstimateResponse(BaseModel):
    estimated_input_tokens: int
    estimated_output_tokens: int
    estimated_cost_usd: float
    confidence: str = "approximate"
    disclaimer: str = (
        "Estimate based on empirical averages. Actual cost may vary +-30% "
        "depending on abstract length and field complexity."
    )


# ── Job responses ──


class IndexerJobResponse(BaseModel):
    job_id: str
    status: str = "pending"
    progress: float = 0.0


class IndexerJobStatusResponse(BaseModel):
    job_id: str
    status: str
    progress: float = 0.0
    partial_results: Optional[list[dict[str, Any]]] = None
    errors: Optional[list[str]] = None
    usage: Optional[dict[str, Any]] = None
    error: Optional[str] = None
    config: Optional[dict[str, Any]] = None
    model_version: Optional[str] = None
    duration_ms: Optional[int] = None
    estimated_cost_usd: Optional[float] = None
    created_at: Optional[str] = None
    completed_at: Optional[str] = None
    error_category: Optional[str] = None
    error_retryable: Optional[bool] = None


class IndexerJobListItem(BaseModel):
    """Summary item for job listing."""

    job_id: str
    status: str
    progress: float
    model: str = ""
    record_count: int = 0
    duration_ms: Optional[int] = None
    estimated_cost_usd: Optional[float] = None
    created_at: Optional[str] = None
    completed_at: Optional[str] = None


# ── AI Field Refinement ──


class RefineFieldsRequest(BaseModel):
    fields: list[IndexerField]
    project_context: Optional[ProjectContext] = None
    sample_records: Optional[list[dict[str, Any]]] = None


class FieldSuggestion(BaseModel):
    action: Literal["add", "modify", "remove", "merge"]
    field: IndexerField
    rationale: str
    original_field_name: Optional[str] = None
    target_field_name: Optional[str] = None


class RefineFieldsResponse(BaseModel):
    suggestions: list[FieldSuggestion]


# ── Tag Grouping ──


class GroupTagsRequest(BaseModel):
    field_name: str
    values: list[str]
    project_context: Optional[ProjectContext] = None
    num_groups_hint: Optional[int] = None


class TagGroup(BaseModel):
    name: str
    values: list[str]
    rationale: str = ""


class GroupTagsResponse(BaseModel):
    groups: list[TagGroup]
    usage: Optional[dict[str, Any]] = None


class SaveTagGroupsRequest(BaseModel):
    tag_groups: dict[str, list[TagGroup]]


# ── Field Suggestion ──


class SuggestFieldsRequest(BaseModel):
    project_context: Optional[ProjectContext] = None
    pico: Optional[dict] = None
    sample_records: Optional[list[dict[str, Any]]] = None
    existing_fields: Optional[list[str]] = None
    model: str = "gpt-4.1"
    mock: bool = False


class ExtractionWarning(BaseModel):
    field: str
    risk_level: str = "medium"
    reason: str = ""
    suggested_fallback: str = ""


class SuggestFieldsResponse(BaseModel):
    fields: list[IndexerField]
    warnings: list[ExtractionWarning] = []
