"""AutoIndexer pipeline: extract structured fields from title/abstract via function calling."""

from __future__ import annotations

import json
import logging
import os
from typing import Any, Optional

from openai import AsyncOpenAI

from crystallise.llm.retry import RetryConfig, async_retry_with_backoff
from crystallise.llm.errors import classify_openai_error
from crystallise.openai_resources.vector_stores import normalize_chat_completion_kwargs

logger = logging.getLogger(__name__)

from crystallise.prompts.indexer import PIPELINE_SYSTEM_PROMPT as DEFAULT_SYSTEM_PROMPT  # noqa: F401, E402
from crystallise.prompts.indexer import PIPELINE_USER_PROMPT as DEFAULT_USER_PROMPT  # noqa: F401, E402

TOOL_NAME = "extract_structured_data"


async def submit_chat_completion(
    *,
    tools: list[dict],
    system_message: str,
    user_message: str,
    title: str,
    abstract: str,
    model: str = "gpt-5-mini",
    api_key: str | None = None,
    retry_config: RetryConfig | None = None,
    tool_name: str = TOOL_NAME,
) -> tuple[dict[str, Any] | None, dict[str, Any], Optional[str]]:
    """
    Submit a single function-calling Chat Completion for field extraction.

    Uses shared async retry logic from llm/retry.py.
    Returns: (result_dict | None, usage_dict, error_str | None)
    """
    import openai

    key = api_key or os.environ.get("OPENAI_API_KEY") or os.environ.get("CRYSTALLISE_OPENAI_API_KEY")
    if not key:
        return None, {}, "AUTH: OPENAI_API_KEY not set"

    cfg = retry_config or RetryConfig()
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": f"{user_message} Title: {title}. Abstract: {abstract}"},
    ]

    async def _call():
        async with AsyncOpenAI(api_key=key) as client:
            completion_kwargs = normalize_chat_completion_kwargs(
                {
                    "model": model,
                    "messages": messages,
                    "tools": tools,
                    "tool_choice": {"type": "function", "function": {"name": tool_name}},
                    "max_completion_tokens": 4096,
                    "temperature": 0.0,
                    "timeout": 30,
                }
            )
            return await client.chat.completions.create(**completion_kwargs)

    usage_out: dict[str, Any] = {}
    try:
        resp = await async_retry_with_backoff(_call, cfg)

        usage = resp.usage
        usage_out = {
            "prompt_tokens": getattr(usage, "prompt_tokens", 0),
            "completion_tokens": getattr(usage, "completion_tokens", 0),
            "total_tokens": getattr(usage, "total_tokens", 0),
        }
        choice0 = resp.choices[0]
        tool_calls = getattr(choice0.message, "tool_calls", None)
        if not tool_calls:
            return None, usage_out, "NO_TOOL_CALLS: Model did not return tool calls"
        for tc in tool_calls:
            fn = getattr(tc, "function", None)
            if fn and getattr(fn, "name", None) == tool_name:
                raw_args = getattr(fn, "arguments", "{}")
                try:
                    return json.loads(raw_args), usage_out, None
                except Exception as e:
                    return None, usage_out, f"JSON_PARSE_ERROR: {str(e)}"
        return None, usage_out, f"NO_MATCHING_TOOL: {tool_name} not found in response"

    except openai.AuthenticationError:
        return None, usage_out, "AUTH: Invalid API key or authentication failed"
    except Exception as e:
        category = classify_openai_error(e)
        return None, usage_out, f"{category.value.upper()}: {type(e).__name__} - {str(e)}"


def validate_extractions(fields: dict[str, Any]) -> dict[str, Any]:
    """Apply server-side confidence validation rules to extraction results.

    Rules:
    - null value → confidence forced to 0.0
    - non-null value with no evidence → confidence capped at 0.5
    """
    for field_name, extraction in fields.items():
        if not isinstance(extraction, dict):
            continue
        if extraction.get("value") is None:
            extraction["confidence"] = 0.0
            extraction["evidence"] = []
        elif not extraction.get("evidence"):
            extraction["confidence"] = min(extraction.get("confidence", 0.0), 0.5)
    return fields


async def process_record(
    *,
    record: dict[str, str],
    tools: list[dict],
    system_message: str,
    user_message: str,
    model: str = "gpt-5-mini",
    api_key: str | None = None,
) -> tuple[dict[str, Any] | None, dict[str, Any] | None, Optional[str]]:
    """
    Process a single record (ID, Title, Abstract) through the indexer.

    Returns: (result, usage, error)
    """
    result, usage, error = await submit_chat_completion(
        tools=tools,
        system_message=system_message,
        user_message=user_message,
        title=record.get("Title", ""),
        abstract=record.get("Abstract", ""),
        model=model,
        api_key=api_key,
        tool_name=TOOL_NAME,
    )
    if result is not None:
        validate_extractions(result)
        result = {
            "ID": record.get("ID", ""),
            "indexing_status": "complete",
            "fields": result,
        }
    elif error:
        result = {"ID": record.get("ID", ""), "indexing_status": "error", "extraction_error": error}
    return result, usage, error