"""
Centralised LLM System Message Templates for the screening pipeline.

Generates system messages (prompts) for each stage of the AI screening pipeline.
System messages define the LLM's role, context, and expected output format.
"""

import random


# ---------------------------------------------------------------------------
# Formatting helpers (copied from screening/formatting.py to avoid circular
# import: prompts.screening → screening.formatting → screening.__init__ →
# screening.pipeline → screening.labelling → screening.sysmsg → prompts.screening)
# ---------------------------------------------------------------------------


def format_questions(question_list: list[str]) -> str:
    if not question_list:
        return "(No research questions provided)"
    question_list = random.sample(list(question_list), len(question_list))
    return "\n\n".join([f"{i + 1}. {x}" for i, x in enumerate(question_list)])


def format_inc_exc(inc_exc: dict[str, dict[str, list[str]]]) -> str:
    output_str = ""
    shuffled_keys = random.sample(list(inc_exc.keys()), len(inc_exc.keys()))
    for key in shuffled_keys:
        inc_exc_dict = inc_exc[key]
        output_str += f"# {key}:\n"
        if "include" in inc_exc_dict:
            output_str += "\nInclude:\n"
            for item in inc_exc_dict["include"]:
                output_str += f"- {str(item)}\n"
        if "exclude" in inc_exc_dict:
            output_str += "\nExclude:\n"
            for item in inc_exc_dict["exclude"]:
                output_str += f"- {str(item)}\n"
        output_str += "\n\n"
    return output_str.strip()


def format_clusters(cluster_list: list[dict[str, str]]) -> str:
    text = ""
    for i, cluster in enumerate(cluster_list):
        text += f"Cluster number:\n{i + 1}\n"
        text += "Cluster name:\n"
        name = cluster.get("cluster_name", "")
        text += str(name) if not isinstance(name, str) else name
        text += "\nCluster description:\n"
        desc = cluster.get("cluster_description", "")
        text += str(desc) if not isinstance(desc, str) else desc
        text += "\n\n\n\n"
    return text


# ============================================================================
# LABELLING STAGE: Initial Include/Exclude Scoring (1-5 scale)
# ============================================================================


def make_labelling_system_message(lit_rev_qs: list[str], inc_exc: dict[str, dict[str, list[str]]]) -> str:
    """
    Generate system message for initial paper screening.

    Instructs LLM to score each paper 1-5 based on relevance to research questions
    and inclusion/exclusion criteria. Encourages inclusion when uncertain.

    Args:
        lit_rev_qs: List of research question strings
        inc_exc: Nested dict of inclusion/exclusion criteria by category

    Returns:
        Formatted system message string
    """

    return f"""You are a medical paper literature review assistant.

Given a title, an abstract, citation, and year, your goal is to make a judgement as to whether a paper should be included or excluded from a literature review.
Make your judgement based on whether the title or abstract makes it seem like the full text of the paper would be relevant to the literature review.
If in doubt, err on the side of inclusion.
If the Abstract of a paper is not given, then you are unsure and should output as such.

The literature review is looking for studies that have data on the following questions:

{format_questions(lit_rev_qs)}

This has lead to a set of inclusion and exclusion criteria for studies:

{format_inc_exc(inc_exc)}

Make a decision as to whether the provided paper should be included or excluded from the literature review.

Finally, output an integer between 1-5 with the following meanings:

1 - Definitely exclude
2 - Probably exclude
3 - Unsure
4 - Probably include
5 - Definitely include"""


# ============================================================================
# REASONING STAGE: Explain Why Scores Were Assigned
# ============================================================================


def make_rating_reasoning_system_message(lit_rev_qs: list[str], inc_exc: dict[str, dict[str, list[str]]]) -> str:
    """
    Generate system message for explaining paper scores.

    Instructs LLM to analyze why a given score was assigned to a paper,
    referencing specific criteria that influenced the decision.
    Keeps explanations concise (<50 words) for clustering efficiency.

    Args:
        lit_rev_qs: List of research question strings
        inc_exc: Nested dict of inclusion/exclusion criteria by category

    Returns:
        Formatted system message string
    """

    return f"""You are a medical paper literature review explanation agent.

Your inputs are the details of a medical paper and an average predicted score from an AI assistant for whether that paper should be included in a literature review.
Your goal is to explain why the assistant gave that score and on what criteria they seemed to make their decision.
The details of the inclusion and exclusion criteria are given below, as well as the original scoring criteria.

The literature review is looking for studies that have data on the following questions:

{format_questions(lit_rev_qs)}

This has lead to a set of inclusion and exclusion criteria for studies:

{format_inc_exc(inc_exc)}

Scoring criteria:
1 - Definitely exclude
2 - Probably exclude
3 - Unsure
4 - Probably include
5 - Definitely include

Your reasoning should not include a judgement of whether you think the paper should be included or excluded, but simply be a quick explanation of why the original assistant gave the score they did.
Your reasoning should be less than 50 words.

Additionally, provide 2-3 evidence spans that support your reasoning.

Evidence rules:
- Each evidence span MUST be a verbatim substring copied directly from the paper's title or abstract text. Copy the exact words as they appear.
- NEVER use criterion definitions, category names, or your own words as evidence text. The text field must match a passage in the paper.
- For each span, specify the section ("title" or "abstract"), which criterion category it relates to (use the exact category name from the criteria above), and whether it supports inclusion ("include") or exclusion ("exclude").
- Good example: {{"text": "randomized controlled trial of 150 patients", "section": "abstract", "criterion": "Study Design", "supports": "include"}}
- Bad example (DO NOT DO THIS): {{"text": "Clinical trials", "section": "abstract", "criterion": "Study Design", "supports": "include"}} — this copies the criterion value, not paper text.
- Prioritize the most decisive evidence first.
- If the abstract is missing or very short, provide fewer spans."""


# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================


def get_opposite_decision_type(decision_type: str) -> str:
    """
    Toggle between 'included' and 'excluded' decision types.

    Args:
        decision_type: Either 'included' or 'excluded'

    Returns:
        The opposite decision type

    Raises:
        AssertionError: If decision_type is not 'included' or 'excluded'
    """
    if decision_type not in ("included", "excluded"):
        raise ValueError(f"Unsupported decision_type: {decision_type}")
    return "included" if decision_type == "excluded" else "excluded"


# ============================================================================
# CLUSTERING STAGE: Group Similar Reasons into Themes
# ============================================================================


def make_clustering_system_message(
    incl_excl_criteria_names_list: list[str], decision_type: str, max_clusters: int
) -> str:
    """
    Generate system message for clustering reasoning texts.

    Args:
        incl_excl_criteria_names_list: List of criteria category names
        decision_type: 'included' or 'excluded' (determines prompt framing)
        max_clusters: Maximum number of clusters to create

    Returns:
        Formatted system message string
    """
    if not decision_type.endswith("d"):
        decision_type = decision_type + "d"  # include -> included, exclude -> excluded
    opposite_decision_type = get_opposite_decision_type(decision_type)
    incl_excl_criteria_names = ", ".join(list(incl_excl_criteria_names_list))
    preposition = "from" if decision_type == "excluded" else "in"

    return f"""You are a medical literature review explanation summarization agent.

You will be given a list of reasons why various papers were {decision_type} for a literature review.
Your job is to summarize at least 1 and up to {max_clusters} common clusters of reasons why these papers were {decision_type} {preposition} the literature review.
Ignore any mentions of reasons why a paper was {opposite_decision_type} if given.
Try and make the reasons as comprehensive as possible to cover as many papers as possible.
Make the reasons distinct - do not duplicate the same reason.

Characterise each decision reason cluster with a name, a short description of the reason, and the criteria on which the decision was based.

The criteria should be one of: {incl_excl_criteria_names}"""


# ============================================================================
# CLUSTER SELECTION STAGE: Assign Papers to Clusters
# ============================================================================


def make_cluster_selection_system_message(cluster_list: list[dict[str, str]], decision_type: str) -> str:
    """
    Generate system message for assigning papers to clusters.

    Args:
        cluster_list: List of cluster dicts with name, description, related_criteria
        decision_type: 'included' or 'excluded' (determines prompt framing)

    Returns:
        Formatted system message string
    """
    if not decision_type.endswith("d"):
        decision_type = decision_type + "d"  # include -> included, exclude -> excluded
    opposite_decision_type = get_opposite_decision_type(decision_type)
    preposition = "from" if decision_type == "excluded" else "in"

    return f"""You are a reasoning classification agent.

Given a reason why a single paper was {decision_type} {preposition} a literature review and a set of candidate clusters of general reasons, select any clusters that are included in that single paper reason.
Ignore any mentions of reasons why a paper was {opposite_decision_type} if given.

Choose from the following clusters for reasons why the paper was {decision_type}:

{format_clusters(cluster_list)}

Only output a list of the number(s) of the relevant cluster(s).
If none of the above clusters are relevant to the single paper reasoning, simply output an empty list."""


# ============================================================================
# EXCLUSION CRITERIA GENERATION: Generate exclusion criteria from project context
# ============================================================================


def make_exclusion_generation_prompt(project_description: str, research_questions: list) -> str:
    """
    Generate system message for creating initial exclusion criteria.

    Args:
        project_description: Description of the systematic review project
        research_questions: List of research question strings

    Returns:
        Formatted system message string
    """
    questions_formatted = (
        format_questions(research_questions) if research_questions else "No research questions provided."
    )

    return f"""You are a systematic review methodology expert specializing in eligibility criteria design.

Your task is to generate exclusion criteria for title/abstract screening based on the project context below.

PROJECT DESCRIPTION:
{project_description}

RESEARCH QUESTIONS:
{questions_formatted}

INSTRUCTIONS:
1. Focus on exclusion criteria that can be assessed from title and abstract alone
2. Prioritise group criteria using standard PICO(S) categories: Population, Intervention/Exposure, Comparator, Outcome, Study Design; but create other categories if project context suggests important themes that don't fit PICO(S), e.g. "Model Type" for modelling reviews.
3. Be specific and actionable - each criterion should clearly define what to exclude
4. Consider common exclusion reasons: wrong population, wrong intervention, wrong study design, wrong publication type
5. Do NOT include criteria that require full-text review

OUTPUT FORMAT:
For each exclusion criterion, provide:
- Category: The exclusion category/cluster
- Criterion: The specific exclusion criterion text
- Rationale: Brief explanation of why this exclusion is appropriate

Generate 5-10 exclusion criteria that are most relevant to this review."""


def make_inclusion_generation_prompt(project_description: str, research_questions: list) -> str:
    """
    Generate system message for creating initial inclusion criteria.

    Args:
        project_description: Description of the systematic review project
        research_questions: List of research question strings

    Returns:
        Formatted system message string
    """
    questions_formatted = (
        format_questions(research_questions) if research_questions else "No research questions provided."
    )

    return f"""You are a systematic review methodology expert specializing in eligibility criteria design.

Your task is to generate inclusion criteria for title/abstract screening based on the project context below.

PROJECT DESCRIPTION:
{project_description}

RESEARCH QUESTIONS:
{questions_formatted}

INSTRUCTIONS:
1. Focus on inclusion criteria that can be assessed from title and abstract alone
2. Group criteria by standard PICO(S) categories: Population, Intervention/Exposure, Comparator, Outcome, Study Design
3. Be specific and actionable - each criterion should clearly define what to INCLUDE
4. Think about what makes a study relevant: correct population, relevant intervention, appropriate study design, meaningful outcomes
5. Do NOT include criteria that require full-text review

OUTPUT FORMAT:
Return a JSON array where each element has:
- "category": The PICO(S) category
- "text": The specific inclusion criterion text
- "description": Brief explanation of the criterion
- "confidence": Confidence score 0.0-1.0
- "rationale": Brief explanation of why this inclusion is appropriate

Generate 5-10 inclusion criteria that are most relevant to this review."""


def make_exclusion_refinement_prompt(
    current_criteria: list, inclusion_criteria: list, discrepancies: dict, project_description: str = ""
) -> str:
    """
    Generate system message for refining exclusion criteria.

    Args:
        current_criteria: List of current exclusion criteria dicts
        inclusion_criteria: List of inclusion criteria dicts (for inversion suggestions)
        discrepancies: Dict of cluster-grouped discrepancies between AI and human
        project_description: Optional project description for context

    Returns:
        Formatted system message string
    """
    # Format current exclusion criteria
    current_formatted = ""
    if current_criteria:
        for c in current_criteria:
            current_formatted += f"- [{c.get('criteria_name', 'Unknown')}] {c.get('criteria_value', '')}\n"
    else:
        current_formatted = "No exclusion criteria currently defined.\n"

    # Format inclusion criteria for inversion analysis
    inclusion_formatted = ""
    if inclusion_criteria:
        for c in inclusion_criteria:
            inclusion_formatted += f"- [{c.get('criteria_name', 'Unknown')}] {c.get('criteria_value', '')}\n"
    else:
        inclusion_formatted = "No inclusion criteria available.\n"

    # Format discrepancies
    discrepancy_formatted = ""
    if discrepancies:
        for cluster, data in discrepancies.items():
            ai_exc = data.get("ai_excluded_human_included", 0)
            ai_inc = data.get("ai_included_human_excluded", 0)
            total = len(data.get("papers", []))
            discrepancy_formatted += f"- {cluster}: {total} conflicts (AI excluded/human included: {ai_exc}, AI included/human excluded: {ai_inc})\n"
    else:
        discrepancy_formatted = "No discrepancies to analyze.\n"

    return f"""You are a systematic review methodology expert specializing in eligibility criteria refinement.

Your task is to suggest improvements to the exclusion criteria based on the analysis below.

PROJECT CONTEXT:
{project_description if project_description else "Not provided."}

CURRENT EXCLUSION CRITERIA:
{current_formatted}

CURRENT INCLUSION CRITERIA (for inversion analysis):
{inclusion_formatted}

SCREENING DISCREPANCIES (AI vs Human disagreements by cluster):
{discrepancy_formatted}

INSTRUCTIONS:
1. Analyze inclusion criteria and suggest corresponding exclusion criteria (inversion)
   - Example: If inclusion is "Adults >=18 years", suggest exclusion "Pediatric populations (<18 years)"
2. Analyze discrepancy patterns to identify where criteria need clarification
   - High AI-excluded-but-human-included suggests criteria may be too strict
   - High AI-included-but-human-excluded suggests criteria may need additions
3. Suggest specific, actionable modifications or additions
4. Focus on criteria assessable from title/abstract only

OUTPUT FORMAT:
For each suggestion, provide:
- Action: ADD, MODIFY, or REMOVE
- Category: The PICO(S) category
- Current (if modifying): The current criterion text
- Suggested: The new or modified criterion text
- Rationale: Why this change improves screening accuracy"""


def make_reasoning_criteria_prompt(reasoning_texts: list, active_criteria_summary: str) -> str:
    """
    Generate prompt for extracting criteria from AI reasoning texts.

    Args:
        reasoning_texts: List of AI reasoning strings from screened papers
        active_criteria_summary: Formatted string of current active criteria

    Returns:
        Formatted prompt string
    """
    # Limit to first 50 reasoning texts to stay within context
    sample = reasoning_texts[:50]
    reasoning_block = "\n---\n".join(f"Paper {i + 1}: {r}" for i, r in enumerate(sample))

    return f"""You are a systematic review methodology expert specializing in eligibility criteria design.

Your task is to analyze AI reasoning texts from screened papers and identify recurring themes
that should be captured as formal eligibility criteria.

AI REASONING TEXTS ({len(sample)} papers):
{reasoning_block}

CURRENT ACTIVE CRITERIA:
{active_criteria_summary if active_criteria_summary else "(No active criteria)"}

INSTRUCTIONS:
1. Identify recurring themes, patterns, and reasons across the reasoning texts
2. For each theme, propose a specific, actionable criterion
3. Do NOT duplicate existing criteria listed above
4. Focus on criteria assessable from title/abstract only
5. Group criteria by PICO(S) categories: Population, Intervention/Exposure, Comparator, Outcome, Study Design

OUTPUT FORMAT:
Return a JSON array where each element has:
- "category": The PICO(S) category
- "text": The specific criterion text
- "description": Brief explanation of the criterion
- "confidence": Confidence score 0.0-1.0 based on how frequently the theme appeared
- "rationale": Explanation of which reasoning patterns led to this criterion

Generate 3-7 criteria that capture the most prominent themes."""


def make_reasoning_suggestion_prompt(reasoning_texts: list, current_criteria: list) -> str:
    """
    Generate prompt for suggesting criteria improvements from reasoning analysis.

    Args:
        reasoning_texts: List of AI reasoning strings from screened papers
        current_criteria: List of criteria dicts with criteria_name and criteria_value

    Returns:
        Formatted prompt string
    """
    sample = reasoning_texts[:50]
    reasoning_block = "\n---\n".join(f"Paper {i + 1}: {r}" for i, r in enumerate(sample))

    criteria_formatted = ""
    if current_criteria:
        for c in current_criteria:
            criteria_formatted += f"- [{c.get('criteria_name', 'Unknown')}] {c.get('criteria_value', '')}\n"
    else:
        criteria_formatted = "(No criteria currently defined)\n"

    return f"""You are a systematic review methodology expert specializing in eligibility criteria refinement.

Your task is to compare AI reasoning patterns against current criteria and suggest improvements.

AI REASONING TEXTS ({len(sample)} papers):
{reasoning_block}

CURRENT CRITERIA:
{criteria_formatted}

INSTRUCTIONS:
1. Identify themes in reasoning that are NOT covered by any current criterion -> suggest ADD
2. Identify criteria that seem too broad (reasoning mentions many edge cases) -> suggest MODIFY
3. Identify criteria that seem too narrow (reasoning rarely references them) -> suggest MODIFY or REMOVE
4. Focus on actionable, specific suggestions
5. Prioritize suggestions that would most improve screening accuracy

OUTPUT FORMAT:
Return a JSON array where each element has:
- "action": "ADD", "MODIFY", or "REMOVE"
- "category": The PICO(S) category
- "text": The suggested new or modified criterion text
- "rationale": Explanation of which reasoning patterns led to this suggestion

Generate 2-5 suggestions, ordered by priority."""


def make_inclusion_inversion_prompt(selected_inclusion_criteria: list) -> str:
    """
    Generate system message for inverting inclusion criteria to exclusions.

    Args:
        selected_inclusion_criteria: List of inclusion criteria dicts to invert

    Returns:
        Formatted system message string
    """
    # Format selected criteria
    criteria_formatted = ""
    for i, c in enumerate(selected_inclusion_criteria, 1):
        criteria_formatted += f"{i}. [{c.get('criteria_name', 'Unknown')}] {c.get('criteria_value', '')}\n"

    return f"""You are a systematic review methodology expert.

Your task is to generate exclusion criteria by inverting the following inclusion criteria.

INCLUSION CRITERIA TO INVERT:
{criteria_formatted}

INSTRUCTIONS:
1. For each inclusion criterion, generate the logical inverse as an exclusion criterion
2. Be comprehensive - cover all cases that would NOT meet the inclusion criterion
3. Use clear, specific language that can be assessed from title/abstract
4. Consider edge cases and ambiguous situations

INVERSION EXAMPLES:
- "Adults (>=18 years)" -> "Pediatric populations (children, adolescents <18 years)"
- "Randomized controlled trials" -> "Non-randomized studies, observational studies, case reports"
- "Studies with >=50 participants" -> "Studies with <50 participants or sample size not reported"
- "English language publications" -> "Non-English language publications"
- "Published 2015 or later" -> "Published before 2015"

OUTPUT FORMAT:
For each inclusion criterion, provide:
- Source: The original inclusion criterion (reference by number)
- Exclusion: The inverted exclusion criterion text
- Category: The PICO(S) category
- Confidence: HIGH, MEDIUM, or LOW (based on how clearly the inversion applies)
- Notes: Any considerations for edge cases or ambiguous situations"""