"""Tests for screening formatting utilities."""
import random


from crystallise.screening.formatting import (
    format_clusters,
    format_inc_exc,
    format_paper_text,
    format_questions,
)


class TestFormatPaperText:
    def test_valid_dict_returns_xml_tagged_text(self):
        row = {"title": "Test Title", "abstract": "Test Abstract", "citation": "J Med 2024"}
        result = format_paper_text(row)
        assert result is not None
        assert "<title>" in result
        assert "Test Title" in result
        assert "<abstract>" in result
        assert "Test Abstract" in result
        assert "<citation>" in result

    def test_empty_values_returns_none(self):
        row = {"title": "", "abstract": "", "citation": ""}
        result = format_paper_text(row)
        assert result is None

    def test_all_whitespace_values_returns_none(self):
        row = {"title": "   ", "abstract": "  ", "citation": " "}
        result = format_paper_text(row)
        assert result is None

    def test_ignores_non_string_values(self):
        row = {"title": "Valid Title", "abstract": 12345, "citation": None}
        result = format_paper_text(row)
        assert result is not None
        assert "<title>" in result
        assert "Valid Title" in result
        # Non-string values should be skipped
        assert "<abstract>" not in result
        assert "<citation>" not in result

    def test_case_insensitive_keys(self):
        row = {"Title": "My Title", "ABSTRACT": "My Abstract"}
        result = format_paper_text(row)
        assert result is not None
        assert "My Title" in result
        assert "My Abstract" in result

    def test_missing_keys_handled(self):
        row = {"title": "Only Title"}
        result = format_paper_text(row)
        assert result is not None
        assert "<title>" in result
        assert "Only Title" in result


class TestFormatQuestions:
    def test_list_returns_numbered_text(self):
        questions = ["What is X?", "How does Y work?"]
        random.seed(42)
        result = format_questions(questions)
        assert "1." in result
        assert "2." in result
        assert "What is X?" in result
        assert "How does Y work?" in result

    def test_empty_list_returns_fallback(self):
        result = format_questions([])
        assert result == "(No research questions provided)"

    def test_none_returns_fallback(self):
        result = format_questions(None)
        assert result == "(No research questions provided)"

    def test_single_question(self):
        result = format_questions(["Only question?"])
        assert "1." in result
        assert "Only question?" in result


class TestFormatIncExc:
    def test_nested_dict_returns_formatted_string(self):
        inc_exc = {
            "Population": {
                "include": ["Adults 18+"],
                "exclude": ["Children under 18"],
            },
            "Study Design": {
                "include": ["RCTs"],
                "exclude": ["Case reports"],
            },
        }
        random.seed(42)
        result = format_inc_exc(inc_exc)
        assert "Include:" in result
        assert "Exclude:" in result
        assert "Adults 18+" in result
        assert "Children under 18" in result
        assert "RCTs" in result
        assert "Case reports" in result

    def test_include_only_section(self):
        inc_exc = {"Population": {"include": ["Adults"]}}
        result = format_inc_exc(inc_exc)
        assert "Include:" in result
        assert "Adults" in result
        assert "Exclude:" not in result

    def test_exclude_only_section(self):
        inc_exc = {"Population": {"exclude": ["Animals"]}}
        result = format_inc_exc(inc_exc)
        assert "Exclude:" in result
        assert "Animals" in result

    def test_empty_dict(self):
        result = format_inc_exc({})
        assert result == ""


class TestFormatClusters:
    def test_cluster_list_returns_numbered_text(self):
        clusters = [
            {"cluster_name": "Treatment Efficacy", "cluster_description": "Papers on treatment outcomes"},
            {"cluster_name": "Study Design", "cluster_description": "Papers on methodology"},
        ]
        result = format_clusters(clusters)
        assert "Cluster number:\n1" in result
        assert "Cluster number:\n2" in result
        assert "Treatment Efficacy" in result
        assert "Study Design" in result
        assert "Papers on treatment outcomes" in result

    def test_empty_cluster_list(self):
        result = format_clusters([])
        assert result == ""

    def test_missing_fields_handled(self):
        clusters = [{"cluster_name": "Name Only"}]
        result = format_clusters(clusters)
        assert "Name Only" in result
