"""Field refinement service for the AutoIndexer."""

from __future__ import annotations

import json
import logging
from typing import Any

from openai import OpenAI

from crystallise.openai_resources.vector_stores import normalize_chat_completion_kwargs
from crystallise.prompts.indexer import REFINEMENT_SYSTEM_PROMPT as SYSTEM_PROMPT

logger = logging.getLogger(__name__)

REFINEMENT_MODEL = "gpt-4.1"


def refine_fields(*, client: OpenAI, req: Any) -> Any:
    """Call LLM to review and suggest improvements to field definitions."""
    from api.schemas.indexer import RefineFieldsResponse, FieldSuggestion, IndexerField

    fields_data = [f.model_dump() for f in req.fields]

    user_parts = [f"Here are my proposed extraction fields:\n{json.dumps(fields_data, indent=2)}"]

    if req.project_context:
        if req.project_context.description:
            user_parts.append(f"\nProject: {req.project_context.description}")
        if req.project_context.research_questions:
            user_parts.append(f"Research questions: {'; '.join(req.project_context.research_questions)}")

    if req.sample_records:
        sample_text = json.dumps(req.sample_records[:3], indent=2, default=str)
        user_parts.append(f"\nSample records (first 3):\n{sample_text}")

    messages = [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": "\n".join(user_parts)},
    ]

    completion_kwargs = normalize_chat_completion_kwargs(
        {
            "model": REFINEMENT_MODEL,
            "messages": messages,
            "max_completion_tokens": 4096,
            "temperature": 0.3,
        }
    )
    resp = client.chat.completions.create(**completion_kwargs)
    content = resp.choices[0].message.content or "[]"

    # Parse response using shared utility
    from crystallise.common.json_utils import parse_llm_json, LLMParseError

    try:
        data = parse_llm_json(content)
    except LLMParseError:
        logger.warning("Failed to parse refinement response: %s", content[:200])
        return RefineFieldsResponse(suggestions=[])

    if not isinstance(data, list):
        data = data.get("suggestions", []) if isinstance(data, dict) else []

    suggestions = []
    for item in data:
        if not isinstance(item, dict):
            continue
        action = item.get("action", "modify")
        if action not in ("add", "modify", "remove", "merge"):
            continue

        field_data = item.get("field", {})
        if not field_data.get("name"):
            continue

        suggestions.append(
            FieldSuggestion(
                action=action,
                field=IndexerField(
                    name=field_data.get("name", ""),
                    description=field_data.get("description", ""),
                    data_type_primary=field_data.get("data_type_primary", "string"),
                    data_type_secondary=field_data.get("data_type_secondary", "NA"),
                    examples=field_data.get("examples", []),
                ),
                rationale=item.get("rationale", ""),
                original_field_name=item.get("original_field_name"),
                target_field_name=item.get("target_field_name"),
            )
        )

    return RefineFieldsResponse(suggestions=suggestions)
