"""Build function-calling schemas for AutoIndexer extraction."""

from __future__ import annotations
from typing import Any


_EVIDENCE_SCHEMA: dict[str, Any] = {
    "type": "array",
    "description": "Exact supporting quotes from the input text",
    "items": {
        "type": "object",
        "properties": {
            "text": {
                "type": "string",
                "description": "Exact quote from title or abstract",
            },
            "section": {
                "type": "string",
                "enum": ["title", "abstract"],
                "description": "Where the evidence was found",
            },
        },
        "required": ["text", "section"],
        "additionalProperties": False,
    },
}


def _build_value_schema(field: dict[str, Any]) -> dict[str, Any]:
    """Build the ``value`` property schema for a single field, preserving type constraints."""
    primary = field.get("data_type_primary", "string")
    secondary = field.get("data_type_secondary", "NA")
    # Normalize compound types: "array-string" → primary="array", secondary="string"
    if primary.startswith("array-"):
        secondary = primary.split("-", 1)[1]
        primary = "array"
    examples = field.get("examples", [])
    examples_mode = field.get("examples_mode", "guide")

    if primary == "array":
        item_type = secondary if secondary and secondary != "NA" else "string"
        item_schema: dict[str, Any] = {"type": item_type}
        if examples and examples_mode == "enum":
            item_schema["enum"] = examples
        schema: dict[str, Any] = {"type": "array", "items": item_schema}
    else:
        schema = {"type": primary or "string"}
        if examples and examples_mode == "enum":
            schema["enum"] = examples

    # Allow null — use anyOf for strict-mode compatibility
    schema = {"anyOf": [schema, {"type": "null"}]}

    return schema


def create_function_schema(
    name: str,
    description: str,
    fields: list[dict[str, Any]],
) -> dict[str, Any]:
    """
    Return a function schema with evidence + confidence wrappers per field.

    Each field becomes an object with: value (typed), confidence, evidence,
    and optionally reasoning + normalised_value for depth="full" fields.
    """
    fn: dict[str, Any] = {
        "name": name,
        "description": description,
        "strict": True,
        "parameters": {
            "type": "object",
            "properties": {},
            "required": [],
            "additionalProperties": False,
        },
    }
    for field in fields:
        field_name = field.get("name", "field")
        depth = field.get("depth", "minimal")

        value_schema = _build_value_schema(field)

        wrapper: dict[str, Any] = {
            "type": "object",
            "properties": {
                "value": value_schema,
                "confidence": {
                    "type": "number",
                    "description": "Confidence 0-1. 0.9+ explicit, 0.7-0.9 implied, <0.7 inferred.",
                },
                "evidence": _EVIDENCE_SCHEMA,
            },
            "required": ["value", "confidence", "evidence"],
            "additionalProperties": False,
        }

        if depth == "full":
            wrapper["properties"]["reasoning"] = {
                "type": "string",
                "description": "One-sentence explanation of how the value was derived",
            }
            wrapper["properties"]["normalised_value"] = {
                "type": "string",
                "description": "Standardised value (e.g., RCT instead of full phrase)",
            }
            wrapper["required"] = ["value", "confidence", "evidence", "reasoning", "normalised_value"]

        fn["parameters"]["properties"][field_name] = wrapper
        fn["parameters"]["required"].append(field_name)

    return fn


def validate_input_dataframe(df) -> tuple[bool, list[str]]:
    """Validate input dataframe has required columns: ID, Title, Abstract."""
    errors = []
    required_cols = ["ID", "Title", "Abstract"]
    for col in required_cols:
        if col not in df.columns:
            errors.append(f"Missing required column: `{col}`")
    if not errors:
        for col in required_cols:
            if df[col].isna().all():
                errors.append(f"Column `{col}` is entirely empty")
    return len(errors) == 0, errors


def validate_schema_fields(df) -> tuple[bool, list[str]]:
    """Validate schema dataframe has required columns."""
    errors = []
    required_cols = ["name", "description", "data_type_primary", "data_type_secondary"]
    for col in required_cols:
        if col not in df.columns:
            errors.append(f"Missing required column: `{col}`")
    if not errors:
        if df["name"].isna().any():
            errors.append("Schema contains rows with missing field names")
        if df["data_type_primary"].isna().any():
            errors.append("Schema contains rows with missing primary data types")
    return len(errors) == 0, errors